source: orange/Orange/OrangeWidgets/OWClusterOptimization.py @ 10466:b52f4c49e735

Revision 10466:b52f4c49e735, 89.8 KB checked in by Ales Erjavec <ales.erjavec@…>, 2 years ago (diff)

Added unicode filename support for the rest of save/load widget dialogs.

Line 
1from OWBaseWidget import *
2import os
3import orange, orngTest
4from copy import copy
5from math import sqrt
6import OWGUI, OWDlgs
7import orngVisFuncts
8import numpy
9
10VALUE = 0
11CLOSURE = 1
12VERTICES = 2
13ATTR_LIST = 3
14CLASS = 4
15ENL_CLOSURE = 5
16OTHER = 6
17STR_LIST = 7
18
19MUST_BE_INSIDE = 0  # values for self.conditionForArgument
20CAN_LIE_NEAR = 1
21
22BEST_GROUPS = 0
23BEST_GROUPS_IN_EACH_CLASS = 1
24
25contMeasures = [("None", None), ("ReliefF", orange.MeasureAttribute_relief(k=10, m=50)), ("Fisher discriminant", orngVisFuncts.MeasureFisherDiscriminant()), ("Signal to Noise Ratio", orngVisFuncts.S2NMeasure()), ("Signal to Noise Ratio For Each Class", orngVisFuncts.S2NMeasureMix())]
26discMeasures = [("None", None), ("ReliefF", orange.MeasureAttribute_relief(k=10, m=50)), ("Gain ratio", orange.MeasureAttribute_gainRatio()), ("Gini index", orange.MeasureAttribute_gini())]
27
28VALUE = 0
29CLUSTER = 1
30DISTANCE = 2
31
32OTHER_CLASS = 0
33OTHER_VALUE = 1
34OTHER_POINTS = 2
35OTHER_DISTANCE = 3
36OTHER_AVERAGE = 4
37OTHER_AVERAGE_DIST = 5
38
39
40class ClusterOptimization(OWBaseWidget):
41    EXACT_NUMBER_OF_ATTRS = 0
42    MAXIMUM_NUMBER_OF_ATTRS = 1
43
44    settingsList = ["resultListLen", "minExamples", "lastSaveDirName", "attrCont", "attrDisc", "showRank",
45                    "showValue", "jitterDataBeforeTriangulation", "useProjectionValue",
46                    "evaluationTime", "distributionScale", "removeDistantPoints", "useAlphaShapes", "alphaShapesValue",
47                    "argumentCountIndex", "evaluationTimeIndex", "conditionForArgument", "moreArgumentsIndex", "canUseMoreArguments",
48                    "parentWidget.clusterClassifierName"]
49    #resultsListLenNums = [ 100 ,  250 ,  500 ,  1000 ,  5000 , 10000, 20000, 50000, 100000, 500000 ]
50    resultsListLenNums = [ 100 ,  250 ,  500 ,  1000 ,  5000 , 10000]
51    resultsListLenList = [str(x) for x in resultsListLenNums]
52    argumentCounts = [1, 5, 10, 20, 40, 100, 100000]
53    evaluationTimeNums = [0.5, 1, 2, 5, 10, 20, 60, 120]
54    evaluationTimeList = [str(x) for x in evaluationTimeNums]
55
56    moreArgumentsNums = [60, 65, 70, 75, 80, 85, 90, 95]
57    moreArgumentsList = ["%d %%" % x for x in moreArgumentsNums]
58
59    def __init__(self, parentWidget = None, signalManager = None, graph = None, parentName = "Visualization widget"):
60        OWBaseWidget.__init__(self, None, signalManager, "Cluster Dialog")
61
62        self.parentWidget = parentWidget
63        self.parentName = parentName
64        self.setCaption("Cluster Dialog")
65        self.controlArea = QVBoxLayout(self)
66
67        self.graph = graph
68        self.minExamples = 0
69        self.resultListLen = 1000
70        self.maxResultListLen = self.resultsListLenNums[len(self.resultsListLenNums)-1]
71        self.onlyOnePerSubset = 1    # used in radviz and polyviz
72        self.lastSaveDirName = os.getcwd() + "/"
73        self.attrCont = 1
74        self.attrDisc = 1
75        self.rawData = None
76        self.subsetdata = None
77        self.arguments = []
78        self.selectedClasses = []
79        self.optimizationType = 1
80        self.jitterDataBeforeTriangulation = 0
81        self.parentWidget.clusterClassifierName = "Visual cluster classifier"
82
83        self.showRank = 0
84        self.showValue = 1
85        self.allResults = []
86        self.shownResults = []
87        self.attrLenDict = {}
88        self.datasetName = ""
89        self.dataset = None
90        self.cancelOptimization = 0
91        self.cancelArgumentation = 0
92        self.pointStability = None
93        self.pointStabilityCount = None
94        self.attributeCountIndex = 1
95        self.argumentationType = BEST_GROUPS
96
97        self.argumentationClassValue = 0
98        self.distributionScale = 1
99        self.considerDistance = 1
100        self.useProjectionValue = 0
101        self.evaluationTime = 30
102        self.useAlphaShapes = 1
103        self.alphaShapesValue = 1.5
104        self.removeDistantPoints = 1
105        self.evaluationTimeIndex = 4
106        self.conditionForArgument = 0
107        self.argumentCountIndex = 1     # when classifying use 10 best arguments
108        self.canUseMoreArguments = 0
109        self.moreArgumentsIndex = 4
110
111        self.loadSettings()
112
113        self.tabs = QTabWidget(self, 'tabWidget')
114        self.controlArea.addWidget(self.tabs)
115
116        self.MainTab = QVGroupBox(self)
117        self.SettingsTab = QVGroupBox(self)
118        self.ArgumentationTab = QVGroupBox(self)
119        self.ClassificationTab = QVGroupBox(self)
120        self.ManageTab = QVGroupBox(self)
121
122        self.tabs.insertTab(self.MainTab, "Main")
123        self.tabs.insertTab(self.SettingsTab, "Settings")
124        self.tabs.insertTab(self.ArgumentationTab, "Argumentation")
125        self.tabs.insertTab(self.ClassificationTab, "Classification")
126        self.tabs.insertTab(self.ManageTab, "Manage & Save")
127
128
129        # ###########################
130        # MAIN TAB
131        self.optimizationBox = OWGUI.widgetBox(self.MainTab, "Evaluate")
132        self.resultsBox = OWGUI.widgetBox(self.MainTab, "Projection list, most interesting first")
133        self.resultsDetailsBox = OWGUI.widgetBox(self.MainTab, "Shown details in projections list" , orientation = "horizontal")
134        self.buttonBox = OWGUI.widgetBox(self.optimizationBox, orientation = "horizontal")
135        self.label1 = QLabel('Projections with ', self.buttonBox)
136        self.optimizationTypeCombo = OWGUI.comboBox(self.buttonBox, self, "optimizationType", items = ["    exactly    ", "  maximum  "] )
137        self.attributeCountCombo = OWGUI.comboBox(self.buttonBox, self, "attributeCountIndex", items = [str(x) for x in range(3, 15)] + ["ALL"], tooltip = "Evaluate only projections with exactly (or maximum) this number of attributes")
138        self.attributeLabel = QLabel(' attributes', self.buttonBox)
139
140        self.startOptimizationButton = OWGUI.button(self.optimizationBox, self, "Start Evaluating Projections")
141        f = self.startOptimizationButton.font()
142        f.setBold(1)
143        self.startOptimizationButton.setFont(f)
144        self.stopOptimizationButton = OWGUI.button(self.optimizationBox, self, "Stop Evaluation", callback = self.stopOptimizationClick)
145        self.stopOptimizationButton.setFont(f)
146        self.stopOptimizationButton.hide()
147
148        self.resultList = OWGUI.listBox(self.resultsBox, self)
149        #self.resultList.setSelectionMode(QListWidget.ExtendedSelection)   # this would be nice if could be enabled, but it has a bug - currentItem doesn't return the correct value if this is on
150        self.resultList.setMinimumSize(200,200)
151
152        self.showRankCheck = OWGUI.checkBox(self.resultsDetailsBox, self, 'showRank', 'Rank', callback = self.updateShownProjections, tooltip = "Show projection ranks")
153        self.showValueCheck = OWGUI.checkBox(self.resultsDetailsBox, self, 'showValue', 'Cluster value', callback = self.updateShownProjections, tooltip = "Show the cluster value")
154
155
156        # ##########################
157        # SETTINGS TAB
158        self.jitteringBox = OWGUI.checkBox(self.SettingsTab, self, 'jitterDataBeforeTriangulation', 'Use data jittering', box = "Jittering options", tooltip = "Use jittering if you get an exception when evaluating clusters. \nIt adds a small random noise to poins which fixes the triangluation problems.")
159        self.clusterEvaluationBox = OWGUI.widgetBox(self.SettingsTab, "Cluster detection settings")
160        alphaBox = OWGUI.widgetBox(self.clusterEvaluationBox, orientation = "horizontal")
161        OWGUI.checkBox(alphaBox, self, "useAlphaShapes", "Use alpha shapes. Alpha value is :  ", tooltip = "Break separated clusters with same class value into subclusters", callback = self.updateGraph)
162        OWGUI.hSlider(alphaBox, self, "alphaShapesValue", minValue = 0.0, maxValue = 30, step = 1, callback = self.updateGraph, labelFormat="%.1f", ticks = 5, divideFactor = 10.0)
163        OWGUI.checkBox(self.clusterEvaluationBox, self, "removeDistantPoints", "Remove distant points", tooltip = "Remove points from the cluster boundary that lie closer to examples with different class value", callback = self.updateGraph)
164
165        valueBox = OWGUI.widgetBox(self.SettingsTab, "Cluster value computation")
166        self.distributionScaleCheck = OWGUI.checkBox(valueBox, self, "distributionScale", "Scale cluster values according to class distribution", tooltip = "Cluster value is (among other things) determined by the number of points inside the cluster. \nThis criteria is unfair in data sets with uneven class distributions.\nThis option takes this into an account by transforming the number of covered points into percentage of all points with the cluster class value.")
167        self.considerDistanceCheck = OWGUI.checkBox(valueBox, self, "considerDistance", "Consider distance between clusters", tooltip = "If checked, cluster value is defined also by the distance between the cluster points and nearest points that belong to a different class")
168
169        self.heuristicsSettingsBox = OWGUI.widgetBox(self.SettingsTab, "Heuristics for attribute ordering")
170        self.miscSettingsBox = OWGUI.widgetBox(self.SettingsTab, "Miscellaneous settings")
171
172        OWGUI.comboBox(self.heuristicsSettingsBox, self, "attrCont", box = "Ordering of continuous attributes", items = [val for (val, m) in contMeasures])
173        OWGUI.comboBox(self.heuristicsSettingsBox, self, "attrDisc", box = "Ordering of discrete attributes", items = [val for (val, m) in discMeasures])
174       
175        self.resultListCombo = OWGUI.comboBoxWithCaption(self.miscSettingsBox, self, "resultListLen", "Maximum length of projection list:"+"   ", tooltip = "Maximum length of the list of interesting projections. This is also the number of projections that will be saved if you click Save button.", items = self.resultsListLenNums, callback = self.updateShownProjections, sendSelectedValue = 1, valueType = int)
176        self.minTableLenEdit = OWGUI.lineEdit(self.miscSettingsBox, self, "minExamples", "Minimum examples in data set:"+"        ", orientation = "horizontal", tooltip = "Due to missing values, different subsets of attributes can have different number of examples. Projections with less than this number of examples will be ignored.", valueType = int)
177
178        # ##########################
179        # ARGUMENTATION tab
180        self.argumentationStartBox = OWGUI.widgetBox(self.ArgumentationTab, "Arguments")
181        self.findArgumentsButton = OWGUI.button(self.argumentationStartBox, self, "Find Arguments", callback = self.findArguments)
182        f = self.findArgumentsButton.font(); f.setBold(1);  self.findArgumentsButton.setFont(f)
183        self.stopArgumentationButton = OWGUI.button(self.argumentationStartBox, self, "Stop Searching", callback = self.stopArgumentationClick)
184        self.stopArgumentationButton.setFont(f)
185        self.stopArgumentationButton.hide()
186        self.classValueList = OWGUI.comboBox(self.ArgumentationTab, self, "argumentationClassValue", box = "Arguments for class:", tooltip = "Select the class value that you wish to see arguments for", callback = self.argumentationClassChanged)
187        self.argumentBox = OWGUI.widgetBox(self.ArgumentationTab, "Arguments for the selected class value")
188        self.argumentList = OWGUI.listBox(self.argumentBox, self)
189        self.argumentList.setMinimumSize(200,200)
190        self.connect(self.argumentList, SIGNAL("selectionChanged()"),self.argumentSelected)
191
192        # ##########################
193        # CLASSIFICATION TAB
194        self.classifierNameEdit = OWGUI.lineEdit(self.ClassificationTab, self, 'parentWidget.clusterClassifierName', box = ' Learner / Classifier Name ', tooltip='Name to be used by other widgets to identify your learner/classifier.')
195        self.useProjectionValueCheck = OWGUI.checkBox(self.ClassificationTab, self, "useProjectionValue", "Use projection score when voting", box = "Voting for class value", tooltip = "Does each projection count for 1 vote or is it dependent on the value of the projection")
196        OWGUI.comboBox(self.ClassificationTab, self, "argumentationType", box = "When searching for arguments consider ... ", items = ["... best evaluated groups", "... best groups for each class value"], tooltip = "When you wish to find arguments or classify an example, do you wish to search groups from the begining of the list\nor do you want to consider best groups for each class value. \nExplanation: For some class value evaluated groups might have significantly lower values than for other classes. \nIf you select 'best evaluated groups' you therefore won't even give a chance to this class value, \nsince its groups will be much lower in the list of evaluated groups.")
197        self.conditionCombo = OWGUI.comboBox(self.ClassificationTab, self, "conditionForArgument", box = "Condition for a cluster to be an argument for an example is that...", items = ["... the example lies inside the cluster", "... the example lies inside or near the cluster"], tooltip = "When searching for arguments or classifying an example we have to define when can a detected cluster be an argument for a class.\nDoes the point being classified have to lie inside that cluster or is it enough that it lies near it.\nIf nearness is enough than the point can be away from the cluster for the distance that is defined as an average distance between points inside the cluster.")
198        self.evaluationTimeEdit = OWGUI.comboBoxWithCaption(self.ClassificationTab, self, "evaluationTimeIndex", "Time for evaluating projections (minutes): ", box = "Evaluating time", tooltip = "The maximum time that the classifier is allowed for evaluating projections (learning)", items = self.evaluationTimeList)
199        projCountBox = OWGUI.widgetBox(self.ClassificationTab, "Argument count")
200        self.argumentCountEdit = OWGUI.comboBoxWithCaption(projCountBox, self, "argumentCountIndex", "Maximum number of arguments used when classifying: ", tooltip = "The maximum number of arguments that will be used when classifying an example.", items = ["1", "5", "10", "20", "40", "100", "All"])
201        projCountBox2 = OWGUI.widgetBox(projCountBox, orientation = "horizontal")
202        self.canUseMoreArgumentsCheck = OWGUI.checkBox(projCountBox2, self, "canUseMoreArguments", "Use additional projections until probability at least: ", tooltip = "If checked, it will allow the classifier to use more arguments when it is not confident enough in the prediction.\nIt will use additional arguments until the predicted probability of one class value will be at least as much as specified in the combo box")
203        self.moreArgumentsCombo = OWGUI.comboBox(projCountBox2, self, "moreArgumentsIndex", items = self.moreArgumentsList, tooltip = "If checked, it will allow the classifier to use more arguments when it is not confident enough in the prediction.\nIt will use additional arguments until the predicted probability of one class value will be at least as much as specified in the combo box")
204
205        # ##########################
206        # SAVE & MANAGE TAB
207        self.classesBox = OWGUI.widgetBox(self.ManageTab, "Select class values you wish to separate")
208        self.manageResultsBox = OWGUI.widgetBox(self.ManageTab, "Number of concurrently visualized attributes")
209        self.manageBox = OWGUI.widgetBox(self.ManageTab, "Manage projections")
210
211        self.classesList = OWGUI.listBox(self.classesBox, selectionMode = QListWidget.MultiSelection, callback = self.updateShownProjections)
212        self.classesList.setMinimumSize(60,60)
213
214        self.buttonBox6 = OWGUI.widgetBox(self.manageBox, orientation = "horizontal")
215        OWGUI.button(self.buttonBox6, self, "Load", self.load)
216        OWGUI.button(self.buttonBox6, self, "Save", self.save)
217
218        #self.buttonBox7 = OWGUI.widgetBox(self.manageBox, orientation = "horizontal")
219        #OWGUI.button(self.buttonBox7, self, "Graph projections", self.graphProjectionQuality)
220        #OWGUI.button(self.buttonBox7, self, "Interaction Analysis", self.interactionAnalysis)
221
222        self.buttonBox3 = OWGUI.widgetBox(self.manageBox, orientation = "horizontal")
223        self.clusterStabilityButton = OWGUI.button(self.buttonBox3, self, 'Show cluster stability', self.evaluatePointsInClusters)
224        self.clusterStabilityButton.setCheckable(1)
225        #self.saveProjectionButton = OWGUI.button(self.buttonBox3, self, 'Save projection')
226        OWGUI.button(self.buttonBox3, self, "Save Best Graphs", self.exportMultipleGraphs)
227
228        OWGUI.button(self.manageBox, self, "Clear Results", self.clearResults)
229
230        self.attrLenList = OWGUI.listBox(self.manageResultsBox, self, selectionMode = QListWidget.MultiSelection, callback = self.attrLenListChanged)
231        self.attrLenList.setMinimumSize(60,60)
232
233        self.resize(375,550)
234        self.setMinimumWidth(375)
235        self.tabs.setMinimumWidth(375)
236
237        self.statusBar = QStatusBar(self)
238        self.controlArea.addWidget(self.statusBar)
239        self.controlArea.activate()
240
241        self.connect(self.classifierNameEdit, SIGNAL("textChanged(const QString &)"), self.changeLearnerName)
242        if self.parentWidget:
243            if hasattr(self.parentWidget, "learnersArray"):
244                self.parentWidget.learnersArray[1] = clusterLearner(self, self.parentWidget)
245            else:
246                self.clusterLearner = clusterLearner(self, self.parentWidget)
247                self.parentWidget.send("Cluster learner", self.clusterLearner)
248
249
250    # ##############################################################
251    # EVENTS
252    # ##############################################################
253    # when text of vizrank or cluster learners change update their name
254    def changeLearnerName(self, text):
255        if self.parentWidget:
256            if hasattr(self.parentWidget, "learnersArray"):
257                self.parentWidget.learnersArray[1].name = self.parentWidget.clusterClassifierName
258            else:
259                self.clusterLearner.name = self.parentWidget.clusterClassifierName
260        else: print "there is no instance of Cluster Learner"
261
262    def updateGraph(self):
263        if self.parentWidget: self.parentWidget.updateGraph()
264
265    # result list can contain projections with different number of attributes
266    # user clicked in the listbox that shows possible number of attributes of result list
267    # result list must be updated accordingly
268    def attrLenListChanged(self, *args):
269        # check which attribute lengths do we want to show
270        self.attrLenDict = {}
271        for i in range(self.attrLenList.count()):
272            intVal = int(str(self.attrLenList.item(i).text()))
273            selected = self.attrLenList.item(i).isSelected()
274            self.attrLenDict[intVal] = selected
275        self.updateShownProjections()
276
277    def clearResults(self):
278        del self.allResults; self.allResults = []
279        del self.shownResults; self.shownResults = []
280        self.resultList.clear()
281        self.attrLenDict = {}
282        self.attrLenList.clear()
283        del self.pointStability; self.pointStability = None
284
285    def clearArguments(self):
286        del self.arguments; self.arguments = []
287        self.argumentList.clear()
288
289    def getSelectedClassValues(self):
290        selectedClasses = []
291        for i in range(self.classesList.count()-1):
292            if self.classesList.item(i).isSelected(): selectedClasses.append(i)
293        if self.classesList.item(self.classesList.count()-1).isSelected():
294            selectedClasses.append(-1)      # "all clusters" is represented by -1 in the selectedClasses array
295        return selectedClasses
296
297    # ##############################################################
298    # ##############################################################
299    def evaluateClusters(self, data):
300        import orangeom
301        graph = orangeom.triangulate(data,0,1,3)
302        graph.returnIndices = 1
303        computeDistances(graph)
304        removeEdgesToDifferentClasses(graph, -3)    # None
305        removeSingleLines(graph, None, None, None, 0, -1)
306        edgesDict, clusterDict, verticesDict, count = enumerateClusters(graph, 0)
307
308        closureDict = computeClosure(graph, edgesDict, 1, 1, 1, -3, -4)   # find points that define the edge of a cluster with one class value. these points used when we search for nearest points of a specific cluster
309        #bigPolygonVerticesDict = copy(verticesDict)  # compute which points lie in which cluster
310        otherClassDict = getPointsWithDifferentClassValue(graph, closureDict)   # this dict is used when evaluating a cluster to find nearest points that belong to a different class
311        removeSingleTrianglesAndLines(graph, edgesDict, clusterDict, verticesDict, -1)
312        #bigPolygonVerticesDict = copy(verticesDict)
313
314        if self.useAlphaShapes:
315            computeAlphaShapes(graph, edgesDict, self.alphaShapesValue / 10.0, -1, 0)          # try to break too empty clusters into more clusters
316            del edgesDict, clusterDict, verticesDict
317            edgesDict, clusterDict, verticesDict, count = enumerateClusters(graph, 1)    # create the new clustering
318            fixDeletedEdges(graph, edgesDict, clusterDict, 0, 1, -1)  # None             # restore edges that were deleted with computeAlphaShapes and did not result breaking the cluster
319        del closureDict
320        closureDict = computeClosure(graph, edgesDict, 1, 1, 2, -3, -4)
321
322        if self.removeDistantPoints:
323            removeDistantPointsFromClusters(graph, edgesDict, clusterDict, verticesDict, closureDict, -2)
324        removeSingleLines(graph, edgesDict, clusterDict, verticesDict, 1, -1)   # None
325        del edgesDict, clusterDict, verticesDict
326        edgesDict, clusterDict, verticesDict, count = enumerateClusters(graph, 1)     # reevaluate clusters - now some clusters might have disappeared
327        removeSingleTrianglesAndLines(graph, edgesDict, clusterDict, verticesDict, -1)
328        # add edges that were removed by removing single points with different class value
329        del closureDict
330        closureDict = computeClosure(graph, edgesDict, 1, 1, 2, -3, -2)
331        polygonVerticesDict = verticesDict  # compute which points lie in which cluster
332
333        aveDistDict = computeAverageDistance(graph, edgesDict)
334
335        # compute areas of all found clusters
336        #areaDict = computeAreas(graph, edgesDict, clusterDict, verticesDict, closureDict, 2)
337
338        # create a list of vertices that lie on the boundary of all clusters - used to determine the distance to the examples with different class
339        allOtherClass = []
340        for key in otherClassDict.keys():
341            allOtherClass += otherClassDict[key]
342        allOtherClass.sort()
343        for i in range(len(allOtherClass)-1)[::-1]:
344            if allOtherClass[i] == allOtherClass[i+1]: allOtherClass.remove(allOtherClass[i])
345
346        valueDict = {}
347        otherDict = {}
348        enlargedClosureDict = {}
349        for key in closureDict.keys():
350            if len(polygonVerticesDict[key]) < 6: continue            # if the cluster has less than 6 points ignore it
351            points = len(polygonVerticesDict[key]) - len(closureDict[key])    # number of points in the interior
352            if points < 2: continue                      # ignore clusters that don't have at least 2 points in the interior of the cluster
353            points += len(closureDict[key]) #* 0.8        # points on the edge only contribute a little less - punishment for very complex boundaries
354            if points < 5: continue                      # ignore too small clusters
355
356            # compute the center of the current cluster
357            xAve = sum([graph.objects[i][0] for i in polygonVerticesDict[key]]) / len(polygonVerticesDict[key])
358            yAve = sum([graph.objects[i][1] for i in polygonVerticesDict[key]]) / len(polygonVerticesDict[key])
359
360            # and compute the average distance of 3 nearest points that belong to different class to this center
361            diffClass = []
362            for v in allOtherClass:
363                if graph.objects[v].getclass() == graph.objects[i].getclass(): continue # ignore examples with the same class value
364                d = sqrt((graph.objects[v][0]-xAve)*(graph.objects[v][0]-xAve) + (graph.objects[v][1]-yAve)*(graph.objects[v][1]-yAve))
365                diffClass.append(d)
366            diffClass.sort()
367            dist = sum(diffClass[:5]) / float(len(diffClass[:5]))
368
369            """
370            # one way of computing the value
371            area = sqrt(sqrt(areaDict[key]))
372            if area > 0: value = points * dist / area
373            else: value = 0
374            """
375
376            # another way of computing value
377            #value = points * dist / aveDistDict[key]
378
379            if self.distributionScale:
380                d = orange.Distribution(graph.objects.domain.classVar, graph.objects)
381                v = d[graph.objects[polygonVerticesDict[key][0]].getclass()]
382                if v == 0: continue
383                points *= sum(d) / float(v)   # turn the number of points into a percentage of all points that belong to this class value and then multiply by the number of all data points in the data set
384
385            # and another
386            #dist = sqrt(dist*1000.0)/sqrt(aveDistDict[key]*1000.0)
387            dist = sqrt(dist*1000.0)
388            value = points
389            if self.considerDistance: value *= dist
390
391            valueDict[key] = value
392            #enlargedClosureDict[key] = enlargeClosure(graph, closureDict[key], aveDistDict[key])
393            enlargedClosureDict[key] = []
394
395            #otherDict[key] = (graph.objects[polygonVerticesDict[key][0]].getclass(), value, points, dist, area)
396            #otherDict[key] = (graph.objects[polygonVerticesDict[key][0]].getclass().value, value, points, dist, aveDistDict[key])
397            otherDict[key] = (int(graph.objects[polygonVerticesDict[key][0]].getclass()), value, points, dist, (xAve, yAve), aveDistDict[key])
398
399        del edgesDict, clusterDict, verticesDict, allOtherClass
400        for key in closureDict.keys():
401            if not otherDict.has_key(key):
402                if closureDict.has_key(key): closureDict.pop(key)
403                if polygonVerticesDict.has_key(key): polygonVerticesDict.pop(key)
404                if enlargedClosureDict.has_key(key): enlargedClosureDict.pop(key)
405        return graph, valueDict, closureDict, polygonVerticesDict, enlargedClosureDict, otherDict
406
407
408    # for each point in the data set compute how often does if appear inside a cluster
409    # for each point then return a float between 0 and 1
410    def evaluatePointsInClusters(self):
411        if self.clusterStabilityButton.isChecked():
412            self.pointStability = numpy.zeros(len(self.rawData), numpy.float)
413            self.pointStabilityCount = [0 for i in range(len(self.rawData.domain.classVar.values))]       # for each class value create a counter that will count the number of clusters for it
414           
415            (text, ok) = QInputDialog.getText('Projection count', 'How many of the best projections do you want to consider?')
416            if not ok: return
417            nrOfProjections = int(str(text))
418
419            considered = 0
420            for i in range(len(self.allResults)):
421                if considered > nrOfProjections: break
422                if type(self.allResults[i][CLASS]) != dict: continue    # ignore all projections except the ones that show all clusters in the picture
423                considered += 1
424                clusterClasses = [0 for j in range(len(self.rawData.domain.classVar.values))]
425                for key in self.allResults[i][VERTICES].keys():
426                    clusterClasses[self.allResults[i][CLASS][key]] = 1
427                    vertices = self.allResults[i][VERTICES][key]
428                    validData = self.graph.getValidList([self.graph.attributeNameIndex[self.allResults[i][ATTR_LIST][0]], self.graph.attributeNameIndex[self.allResults[i][ATTR_LIST][1]]])
429                    indices = numpy.compress(validData, numpy.array(range(len(self.rawData))), axis = 1)
430                    indicesToOriginalTable = numpy.take(indices, vertices)
431                    tempArray = numpy.zeros(len(self.rawData))
432                    numpy.put(tempArray, indicesToOriginalTable, numpy.ones(len(indicesToOriginalTable)))
433                    self.pointStability += tempArray
434                for j in range(len(clusterClasses)):        # some projections may contain more clusters of the same class. we make sure we don't process this wrong
435                    self.pointStabilityCount[j] += clusterClasses[j]
436       
437            for i in range(len(self.rawData)):
438                if self.pointStabilityCount[int(self.rawData[i].getclass())] != 0:
439                    self.pointStability[i] /= float(self.pointStabilityCount[int(self.rawData[i].getclass())])
440
441        #self.pointStability = [1.0 - val for val in self.pointStability]
442        if self.parentWidget: self.parentWidget.showSelectedCluster()
443
444
445    def updateShownProjections(self, *args):
446        self.resultList.clear()
447        self.shownResults = []
448        self.selectedClasses = self.getSelectedClassValues()
449
450        i = 0
451        while self.resultList.count() < self.resultListLen and i < len(self.allResults):
452            if self.attrLenDict[len(self.allResults[i][ATTR_LIST])] != 1: i+=1; continue
453            if type(self.allResults[i][CLASS]) == dict and -1 not in self.selectedClasses: i+=1; continue
454            if type(self.allResults[i][CLASS]) == int and self.allResults[i][CLASS] not in self.selectedClasses: i+=1; continue
455
456            string = ""
457            if self.showRank: string += str(i+1) + ". "
458            if self.showValue: string += "%.2f - " % (self.allResults[i][VALUE])
459
460            if self.allResults[i][STR_LIST] != "": string += self.allResults[i][STR_LIST]
461            else: string += self.buildAttrString(self.allResults[i][ATTR_LIST])
462
463            self.resultList.addItem(string)
464            self.shownResults.append(self.allResults[i])
465            i+=1
466        if self.resultList.count() > 0: self.resultList.setCurrentItem(self.resultList.item(0))
467
468
469    # save input dataset, get possible class values, ...
470    def setData(self, data, clearResults = 1):
471        if hasattr(data, "name"): self.datasetName = data.name
472        else: self.datasetName = ""
473        sameDomain = 0
474        if not clearResults and self.rawData and data and self.rawData.domain == data.domain: sameDomain = 1
475        self.rawData = data
476        self.clearArguments()
477        if not sameDomain: self.clearResults()
478
479        if not data or not (data.domain.classVar and data.domain.classVar.varType == orange.VarTypes.Discrete):
480            self.selectedClasses = []
481            self.classesList.clear()
482            self.classValueList.clear()
483            return
484
485        if not sameDomain:
486            self.classesList.clear()
487            self.classValueList.clear()
488            self.selectedClasses = []
489
490            # add class values
491            for i in range(len(data.domain.classVar.values)):
492                self.classesList.addItem(data.domain.classVar.values[i])
493                self.classValueList.addItem(data.domain.classVar.values[i])
494            self.classesList.addItem("All clusters")
495            self.classesList.selectAll()
496            if len(data.domain.classVar.values) > 0: self.classValueList.setCurrentIndex(0)
497
498    # save subsetdata. first example from this dataset can be used with argumentation - it can find arguments for classifying the example to the possible class values
499    def setSubsetData(self, subsetdata):
500        self.subsetdata = subsetdata
501        self.clearArguments()
502
503
504    # given a dataset return a list of (val, attrName) where val is attribute "importance" and attrName is name of the attribute
505    def getEvaluatedAttributes(self, data):
506        self.setStatusBarText("Evaluating attributes...")
507        attrs = orngVisFuncts.evaluateAttributes(data, contMeasures[self.attrCont][1], discMeasures[self.attrDisc][1])
508        self.setStatusBarText("")
509        return attrs
510
511    def addResult(self, value, closure, vertices, attrList, classValue, enlargedClosure, other, strList = ""):
512        self.insertItem(self.findTargetIndex(value), value, closure, vertices, attrList, classValue, enlargedClosure, other, strList)
513        qApp.processEvents()        # allow processing of other events
514
515    # use bisection to find correct index
516    def findTargetIndex(self, value):
517        top = 0; bottom = len(self.allResults)
518
519        while (bottom-top) > 1:
520            mid  = (bottom + top)/2
521            if max(value, self.allResults[mid][VALUE]) == value: bottom = mid
522            else: top = mid
523
524        if len(self.allResults) == 0: return 0
525        if max(value, self.allResults[top][VALUE]) == value:
526            return top
527        else:
528            return bottom
529
530    # insert new result - give parameters: value of the cluster, closure, list of attributes.
531    # parameter strList can be a pre-formated string containing attribute list (used by polyviz)
532    def insertItem(self, index, value, closure, vertices, attrList, classValue, enlargedClosure, other, strList = ""):
533        if index < self.maxResultListLen:
534            self.allResults.insert(index, (value, closure, vertices, attrList, classValue, enlargedClosure, other, strList))
535            if len(self.allResults) > self.maxResultListLen: self.allResults.pop()
536        if index < self.resultListLen:
537            string = ""
538            if self.showRank: string += str(index+1) + ". "
539            if self.showValue: string += "%.2f - " % (value)
540
541            if strList != "": string += strList
542            else: string += self.buildAttrString(attrList)
543
544            self.resultList.insertItem(index, string)
545            self.shownResults.insert(index, (value, closure, vertices, attrList, classValue, enlargedClosure, other, strList))
546
547        # remove worst projection if list is too long
548        if self.resultList.count() > self.resultListLen:
549            self.resultList.takeItem(self.resultList.count()-1)
550            self.shownResults.pop()
551
552    def finishedAddingResults(self):
553        self.cancelOptimization = 0
554
555        self.attrLenList.clear()
556        self.attrLenDict = {}
557        maxLen = -1
558        for i in range(len(self.shownResults)):
559            if len(self.shownResults[i][ATTR_LIST]) > maxLen:
560                maxLen = len(self.shownResults[i][ATTR_LIST])
561        if maxLen == -1: return
562        if maxLen == 2: vals = [2]
563        else: vals = range(3, maxLen+1)
564        for val in vals:
565            self.attrLenList.addItem(str(val))
566            self.attrLenDict[val] = 1
567        self.attrLenList.selectAll()
568        self.resultList.setCurrentItem(self.resultList.item(0))
569
570
571    # ##############################################################
572    # Loading and saving projection files
573    # ##############################################################
574
575    # save the list into a file - filename can be set if you want to call this function without showing the dialog
576    def save(self, filename = None):
577        if filename == None:
578            # get file name
579            if self.datasetName != "":
580                filename = "%s - %s" % (os.path.splitext(os.path.split(self.datasetName)[1])[0], self.parentName)
581            else:
582                filename = "%s" % (self.parentName)
583            qname = QFileDialog.getSaveFileName( self.lastSaveDirName + "/" + filename, "Interesting clusters (*.clu)", self, "", "Save Clusters")
584            if qname.isEmpty(): return
585            name = unicode(qname)
586        else:
587            name = filename
588
589        # take care of extension
590        if os.path.splitext(name)[1] != ".clu":
591            name = name + ".clu"
592
593        dirName, shortFileName = os.path.split(name)
594        self.lastSaveDirName = dirName
595
596        # open, write and save file
597        file = open(name, "wt")
598        attrs = ["resultListLen", "parentName"]
599        Dict = {}
600        for attr in attrs: Dict[attr] = self.__dict__[attr]
601        file.write("%s\n" % (str(Dict)))
602        file.write("%s\n" % str(self.selectedClasses))
603        file.write("%d\n" % self.rawData.checksum())
604        for (value, closure, vertices, attrList, classValue, enlargedClosure, other, strList) in self.shownResults:
605            if type(classValue) != dict: continue
606            s = "(%s, %s, %s, %s, %s, %s, %s, '%s')\n" % (str(value), str(closure), str(vertices), str(attrList), str(classValue), str(enlargedClosure), str(other), strList)
607            file.write(s)
608        file.flush()
609        file.close()
610
611
612    # load projections from a file
613    def load(self):
614        self.clearResults()
615        self.clearArguments()
616        if self.rawData == None:
617            QMessageBox.critical(None,'Load','There is no data. First load a data set and then load a cluster file',QMessageBox.Ok)
618            return
619
620        name = QFileDialog.getOpenFileName( self.lastSaveDirName, "Interesting clusters (*.clu)", self, "", "Open Clusters")
621        if name.isEmpty(): return
622        name = unicode(name)
623
624        dirName, shortFileName = os.path.split(name)
625        self.lastSaveDirName = dirName
626
627        file = open(name, "rt")
628        settings = eval(file.readline()[:-1])
629        if settings.has_key("parentName") and settings["parentName"] != self.parentName:
630            QMessageBox.critical( None, "Cluster Dialog", 'Unable to load cluster file. It was saved for %s method'%(settings["parentName"]), QMessageBox.Ok)
631            file.close()
632            return
633
634        self.setSettings(settings)
635
636        # find if it was computed for specific class values
637        selectedClasses = eval(file.readline()[:-1])
638        for i in range(len(self.rawData.domain.classVar.values)):
639            self.classesList.item(i).setSelected(i in selectedClasses)
640        if -1 in selectedClasses: self.classesList.item(self.classesList.count()-1).setSelected(1)
641        checksum = eval(file.readline()[:-1])
642        if self.rawData and self.rawData.checksum() != checksum:
643            cancel = QMessageBox.critical(None, "Load", "Currently loaded data set is different than the data set that was used for computing these projections. \nThere may be differences in the number of examples or in actual data values. \nThe shown clusters will therefore most likely show incorrect information. \nAre you sure you want to continue with loading?", 'Yes','No', '', 1,0)
644            if cancel: return
645
646        line = file.readline()[:-1];
647        while (line != ""):
648            (value, closure, vertices, attrList, classValue, enlargedClosure, other, strList) = eval(line)
649            self.addResult(value, closure, vertices, attrList, classValue, enlargedClosure, other, strList)
650            for key in classValue.keys():
651                self.addResult(other[key][1], closure[key], vertices[key], attrList, classValue[key], enlargedClosure[key], other[key], strList)
652            line = file.readline()[:-1]
653        file.close()
654
655        # update loaded results
656        self.finishedAddingResults()
657
658
659    # disable all controls while evaluating projections
660    def disableControls(self):
661        self.optimizationTypeCombo.setEnabled(0)
662        self.attributeCountCombo.setEnabled(0)
663        self.startOptimizationButton.hide()
664        self.stopOptimizationButton.show()
665        self.SettingsTab.setEnabled(0)
666        self.ManageTab.setEnabled(0)
667        self.ArgumentationTab.setEnabled(0)
668        self.ClassificationTab.setEnabled(0)
669
670    def enableControls(self):
671        self.optimizationTypeCombo.setEnabled(1)
672        self.attributeCountCombo.setEnabled(1)
673        self.startOptimizationButton.show()
674        self.stopOptimizationButton.hide()
675        self.SettingsTab.setEnabled(1)
676        self.ManageTab.setEnabled(1)
677        self.ArgumentationTab.setEnabled(1)
678        self.ClassificationTab.setEnabled(1)
679
680    # ##############################################################
681    # exporting multiple pictures
682    # ##############################################################
683    def exportMultipleGraphs(self):
684        (text, ok) = QInputDialog.getText('Graph count', 'How many of the best projections do you wish to save?')
685        if not ok: return
686        self.bestGraphsCount = int(str(text))
687
688        self.sizeDlg = OWDlgs.OWChooseImageSizeDlg(self.graph, parent=self)
689        self.sizeDlg.disconnect(self.sizeDlg.okButton, SIGNAL("clicked()"), self.sizeDlg.accept)
690        self.sizeDlg.connect(self.sizeDlg.okButton, SIGNAL("clicked()"), self.saveToFileAccept)
691        self.sizeDlg.exec_()
692
693    def saveToFileAccept(self):
694        fileName = unicode(QFileDialog.getSaveFileName("Graph","Portable Network Graphics (*.PNG);;Windows Bitmap (*.BMP);;Graphics Interchange Format (*.GIF)", None, "Save to...", "Save to..."))
695        if fileName == "": return
696        (fil,ext) = os.path.splitext(fileName)
697        ext = ext.replace(".","")
698        if ext == "":
699            ext = "PNG"     # if no format was specified, we choose png
700            fileName = fileName + ".png"
701        ext = ext.upper()
702
703        (fil, extension) = os.path.splitext(fileName)
704        size = self.sizeDlg.getSize()
705        for i in range(1, min(self.resultList.count(), self.bestGraphsCount+1)):
706            self.resultList.item(i-1).setSelected(1)
707            self.graph.replot()
708            name = fil + " (%02d)" % i + extension
709            self.sizeDlg.saveToFileDirect(name, ext, size)
710        QDialog.accept(self.sizeDlg)
711
712    """
713    def interactionAnalysis(self):
714        from OWkNNOptimization import OWInteractionAnalysis, CLUSTER_POINT
715        dialog = OWInteractionAnalysis(self, signalManager = self.signalManager)
716        dialog.setResults(self.shownResults, CLUSTER_POINT)
717        dialog.show()
718
719    def graphProjectionQuality(self):
720        from OWkNNOptimization import OWGraphProjectionQuality, CLUSTER_POINT
721        dialog = OWGraphProjectionQuality(self, signalManager = self.signalManager)
722        dialog.setResults(self.allResults, CLUSTER_POINT)
723        dialog.show()
724    """
725
726
727
728    # ######################################################
729    # Auxiliary functions
730    # ######################################################
731    def getOptimizationType(self):
732        return self.optimizationType
733
734    # from a list of attributes build a nice string with attribute names
735    def buildAttrString(self, attrList):
736        if len(attrList) == 0: return ""
737        strList = attrList[0]
738        for item in attrList[1:]:
739            strList = strList + ", " + item
740        return strList
741
742    def getAllResults(self):
743        return self.allResults
744
745    def getShownResults(self):
746        return self.shownResults
747
748    def getSelectedCluster(self):
749        if self.resultList.count() == 0: return None
750        return self.shownResults[self.resultList.currentItem()]
751
752
753    def stopOptimizationClick(self):
754        self.cancelOptimization = 1
755
756    def isOptimizationCanceled(self):
757        return self.cancelOptimization
758
759    def destroy(self, dw = 1, dsw = 1):
760        self.saveSettings()
761
762    def setStatusBarText(self, text):
763        self.statusBar.message(text)
764        qApp.processEvents()
765
766    # ######################################################
767    # Argumentation functions
768    # ######################################################
769
770    # use evaluated projections to find arguments for classifying the first example in the self.subsetdata to possible classes.
771    # add found arguments into the list
772    # find only the number of argument that is specified in Arugment count combo in Classification tab
773    def findArguments(self, selectBest = 1, showClassification = 1):
774        self.cancelArgumentation = 0
775        self.clearArguments()
776        self.arguments = [[] for i in range(self.classValueList.count())]
777
778        if self.subsetdata == None:
779            QMessageBox.information( None, "Cluster Dialog Argumentation", 'To find arguments you first have to provide a new example that you wish to classify. \nYou can do this by sending the example to the visualization widget through the "Example Subset" signal.', QMessageBox.Ok + QMessageBox.Default)
780            return
781        if len(self.shownResults) == 0:
782            QMessageBox.information( None, "Cluster Dialog Argumentation", 'To find arguments you first have to find clusters in some projections by clicking "Find arguments" in the Main tab.', QMessageBox.Ok + QMessageBox.Default)
783            return
784
785        example = self.subsetdata[0]    # we can find arguments only for one example. We select only the first example in the example table
786        testExample = [self.parentWidget.graph.scaleExampleValue(example, i) for i in range(len(example.domain.attributes))]
787
788        self.findArgumentsButton.hide()
789        self.stopArgumentationButton.show()
790
791        foundArguments = 0
792        argumentCount = self.argumentCounts[self.argumentCountIndex]
793        vals = [0.0 for i in range(len(self.arguments))]
794
795        # ####################################################################
796        # find arguments so that we evaluate best argumentCount clusters for each class value and see how often does the point lie inside them
797        # this way, we don't care if clusters of one class value have much higher values than clusters of another class.
798        # NOTE: argumentCount in this case represents the number of evaluated clusters and not the number of clusters where the point actually lies inside
799        if self.argumentationType == BEST_GROUPS_IN_EACH_CLASS:
800            classIndices = [0 for i in range(len(self.rawData.domain.classVar.values))]
801            canFinish = 0
802            while not canFinish:
803                for cls in range(len(classIndices)):
804                    startingIndex = classIndices[cls]
805
806                    for index in range(len(self.allResults) - startingIndex):
807                        (value, closure, vertices, attrList, classValue, enlargedClosure, other, strList) = self.allResults[startingIndex+index]
808                        if type(classValue) == dict: continue       # the projection contains several clusters
809                        if classValue != cls: continue
810                        classIndices[cls] = startingIndex + index + 1   # remember where to start next
811
812                        qApp.processEvents()
813                        inside = self.isExampleInsideCluster(attrList, testExample, closure, vertices, other[OTHER_AVERAGE_DIST])
814                        if inside == 0 or (inside == 1 and self.conditionForArgument == MUST_BE_INSIDE): continue
815                        vals[classValue] += 1
816
817                        self.arguments[classValue].append((None, value, attrList, startingIndex+index))
818                        if classValue == self.classValueList.currentItem():
819                            self.argumentList.addItem("%.2f - %s" %(value, attrList))
820                    if classIndices[cls] == startingIndex: classIndices[cls] = len(self.allResults)+1
821
822                foundArguments += 1
823                if min(classIndices) >= len(self.allResults): canFinish = 1  # out of possible arguments
824
825                if sum(vals) > 0 and foundArguments >= argumentCount:
826                    if not self.canUseMoreArguments or (max(vals)*100.0)/float(sum(vals)) > self.moreArgumentsNums[self.moreArgumentsIndex]:
827                        canFinish = 1
828        # ####################################################################
829        # we consider clusters with the highest values and check if the point lies inside them. When it does we say that this cluster is an argument for
830        # that class. We continue until we find argumentCount such arguments. Then we predict the most likely class value
831        else:
832            for index in range(len(self.allResults)):
833                if self.cancelArgumentation: break
834                # we also stop if we are not allowed to search for more than argumentCount arguments or we are allowed and we have a reliable prediction or we have used a 100 additional arguments
835                if foundArguments >= argumentCount and (not self.canUseMoreArguments or (max(vals)*100.0 / sum(vals) > self.moreArgumentsNums[self.moreArgumentsIndex]) or foundArguments >= argumentCount + 100): break
836
837                (value, closure, vertices, attrList, classValue, enlargedClosure, other, strList) = self.allResults[index]
838                if type(classValue) == dict: continue       # the projection contains several clusters
839
840                qApp.processEvents()
841                inside = self.isExampleInsideCluster(attrList, testExample, closure, vertices, other[OTHER_AVERAGE_DIST])
842                if self.conditionForArgument == MUST_BE_INSIDE and inside != 2: continue
843                elif inside == 0: continue
844
845                foundArguments += 1  # increase argument count
846
847                if self.useProjectionValue: vals[classValue] += value
848                else: vals[classValue] += 1
849
850                self.arguments[classValue].append((None, value, attrList, index))
851                if classValue == self.classValueList.currentItem():
852                    self.argumentList.addItem("%.2f - %s" %(value, attrList))
853
854        self.stopArgumentationButton.hide()
855        self.findArgumentsButton.show()
856        if self.argumentList.count() > 0 and selectBest: self.argumentList.setCurrentItem(self.argumentList.item(0))
857        if foundArguments == 0: return (None, None)
858
859        suma = sum(vals)
860        dist = orange.DiscDistribution([val/float(suma) for val in vals]);  dist.variable = self.rawData.domain.classVar
861        classValue = self.rawData.domain.classVar[vals.index(max(vals))]
862
863        # show classification results
864        s = '<nobr>Based on current classification settings, the example would be classified </nobr><br><nobr>to class <b>%(cls)s</b> with probability <b>%(prob).2f%%</b>.</nobr><br><nobr>Predicted class distribution is:</nobr><br>' % {"cls": classValue, "prob": dist[classValue]*100}
865        for key in dist.keys():
866            s += "<nobr>&nbsp &nbsp &nbsp &nbsp %s : %.2f%%</nobr><br>" % (key, dist[key]*100)
867        if foundArguments > argumentCount:
868            s += "<nobr>Note: To get the current prediction, <b>%(fa)d</b> arguments had to be used (instead of %(ac)d)<br>" % {"fa": foundArguments, "ac": argumentCount}
869        s = s[:-4]
870        if showClassification:
871            QMessageBox.information(None, "Classification results", s, QMessageBox.Ok + QMessageBox.Default)
872        return classValue, dist
873
874    # is the example described with scaledExampleVals inside closure that contains points in vertices in projection of attrList attributes
875    def isExampleInsideCluster(self, attrList, scaledExampleVals, closure, vertices, averageEdgeDistance):
876        testExampleAttrVals = [scaledExampleVals[self.graph.attributeNameIndex[attrList[i]]] for i in range(len(attrList))]
877        if min(testExampleAttrVals) < 0.0 or max(testExampleAttrVals) > 1.0: return 0
878
879        array = self.graph.createProjectionAsNumericArray([self.graph.attributeNameIndex[attr] for attr in attrList])
880        if array == None:
881            return 0
882        short = numpy.transpose(numpy.take(array, vertices))
883
884        [xTest, yTest] = self.graph.getProjectedPointPosition(attrList, testExampleAttrVals)
885
886        #if xTest < min(short[0]) or xTest > max(short[0]) or yTest < min(short[1]) or yTest > max(short[1]):
887        #    del array, short; return 0       # the point is definitely not inside the cluster
888
889        val = pointInsideCluster(array, closure, xTest, yTest)
890        if val:
891            del array, short
892            return 2
893
894        val = 0
895        points = union([i for (i,j) in closure], [j for (i,j) in closure])
896        for i in range(len(points)):
897            xdiff = array[points[i]][0] - xTest
898            ydiff = array[points[i]][1] - yTest
899            if sqrt(xdiff*xdiff + ydiff*ydiff) < averageEdgeDistance:
900                val = 1
901                break
902
903        return val
904
905
906    # use bisection to find correct index
907    def findArgumentTargetIndex(self, value, arguments):
908        top = 0; bottom = len(arguments)
909
910        while (bottom-top) > 1:
911            mid  = (bottom + top)/2
912            if max(value, arguments[mid][1]) == value: bottom = mid
913            else: top = mid
914
915        if len(arguments) == 0: return 0
916        if max(value, arguments[top][1]) == value:
917            return top
918        else:
919            return bottom
920
921    def stopArgumentationClick(self):
922        self.cancelArgumentation = 1
923
924    def argumentationClassChanged(self):
925        self.argumentList.clear()
926        if len(self.arguments) == 0: return
927        ind = self.classValueList.currentItem()
928        for i in range(len(self.arguments[ind])):
929            val = self.arguments[ind][i]
930            if val[0] != None:  self.argumentList.insertItem(val[0], "%.2f - %s" %(val[1], val[2]))
931            else:               self.argumentList.addItem("%.2f - %s" %(val[1], val[2]))
932
933    def argumentSelected(self):
934        ind = self.argumentList.currentItem()
935        classInd = self.classValueList.currentItem()
936        clusterClosure = (self.allResults[self.arguments[classInd][ind][3]][CLOSURE], self.allResults[self.arguments[classInd][ind][3]][ENL_CLOSURE], self.allResults[self.arguments[classInd][ind][3]][CLASS])
937        self.parentWidget.updateGraph(self.arguments[classInd][ind][2], clusterClosure = clusterClosure)
938
939
940class clusterClassifier(orange.Classifier):
941    def __init__(self, clusterOptimizationDlg, visualizationWidget, data, firstTime = 1):
942        self.clusterOptimizationDlg = clusterOptimizationDlg
943        self.visualizationWidget = visualizationWidget
944        self.data = data
945
946        results = clusterOptimizationDlg.getAllResults()
947        if firstTime and results != None and len(results) > 0:
948            computeProjections = QMessageBox.information(clusterOptimizationDlg, 'Cluster classifier', 'Do you want to classify examples based the projections that are currently in the projection list \n or do you want to compute new projections?','Current projections','Compute new projections', '', 0,1)
949            #computeProjections = 0
950        elif results != None and len(results) > 0:
951            computeProjections = 0
952        else: computeProjections = 1
953
954        self.visualizationWidget.setData(data, clearResults = computeProjections)
955        if computeProjections == 1:     # run cluster detection
956            self.evaluating = 1
957            t = QTimer(self.visualizationWidget)
958            self.visualizationWidget.connect(t, SIGNAL("timeout()"), self.stopEvaluation)
959            t.start(self.clusterOptimizationDlg.evaluationTimeNums[self.clusterOptimizationDlg.evaluationTimeIndex] * 60 * 1000, 1)
960            self.visualizationWidget.optimizeClusters()
961            t.stop()
962            self.evaluating = 0
963
964    # timer event that stops evaluation of clusters
965    def stopEvaluation(self):
966        if self.evaluating:
967            self.clusterOptimizationDlg.stopOptimizationClick()
968
969
970    # for a given example run argumentation and find out to which class it most often falls
971    def __call__(self, example, returnType):
972        testExample = [self.visualizationWidget.graph.scaleExampleValue(example, i) for i in range(len(example.domain.attributes))]
973
974        argumentCount = 0
975        argumentValues = []
976        allowedArguments = self.clusterOptimizationDlg.argumentCounts[self.clusterOptimizationDlg.argumentCountIndex]
977
978        classProjectionVals = [0 for i in range(len(self.clusterOptimizationDlg.rawData.domain.classVar.values))]
979        classIndices = [0 for i in range(len(self.clusterOptimizationDlg.rawData.domain.classVar.values))]
980        if self.clusterOptimizationDlg.argumentationType == BEST_GROUPS_IN_EACH_CLASS:
981            canFinish = 0
982            while not canFinish:
983                for cls in range(len(classIndices)):
984                    startingIndex = classIndices[cls]
985
986                    for index in range(len(self.clusterOptimizationDlg.allResults) - startingIndex):
987                        (value, closure, vertices, attrList, classValue, enlargedClosure, other, strList) = self.clusterOptimizationDlg.allResults[startingIndex+index]
988                        if type(classValue) == dict: continue       # the projection contains several clusters
989                        if classValue != cls: continue
990                        classIndices[cls] = startingIndex + index + 1   # remember where to start next
991
992                        qApp.processEvents()
993                        attrIndices = [self.visualizationWidget.graph.attributeNameIndex[attr] for attr in attrList]
994                        data = self.visualizationWidget.graph.createProjectionAsExampleTable(attrIndices, jitterSize = 0.001 * self.clusterOptimizationDlg.jitterDataBeforeTriangulation)
995                        graph, valueDict, closureDict, polygonVerticesDict, enlargedClosureDict, otherDict = self.clusterOptimizationDlg.evaluateClusters(data)
996                        for key in valueDict.keys():
997                            if classValue != otherDict[key][OTHER_CLASS]: continue
998                            inside = self.clusterOptimizationDlg.isExampleInsideCluster(attrList, testExample, closureDict[key], polygonVerticesDict[key], otherDict[key][OTHER_AVERAGE_DIST])
999                            if inside == 0 or (inside == 1 and self.clusterOptimizationDlg.conditionForArgument == MUST_BE_INSIDE): continue
1000                            classProjectionVals[classValue] += 1
1001                        break
1002                    if classIndices[cls] == startingIndex: classIndices[cls] = len(self.clusterOptimizationDlg.allResults)+1
1003
1004                argumentCount += 1
1005                if min(classIndices) >= len(self.clusterOptimizationDlg.allResults): canFinish = 1  # out of possible arguments
1006
1007                if sum(classProjectionVals) > 0 and argumentCount >= allowedArguments:
1008                    if not self.clusterOptimizationDlg.canUseMoreArguments or (max(classProjectionVals)*100.0)/float(sum(classProjectionVals)) > self.clusterOptimizationDlg.moreArgumentsNums[self.clusterOptimizationDlg.moreArgumentsIndex]:
1009                        canFinish = 1
1010
1011            if max(classProjectionVals) == 0:
1012                print "there are no arguments for this example in the current projection list."
1013                dist = orange.DiscDistribution([1/float(len(classProjectionVals)) for i in classProjectionVals]); dist.variable = self.clusterOptimizationDlg.rawData.domain.classVar
1014                return (self.clusterOptimizationDlg.rawData.domain.classVar[0], dist)
1015
1016            ind = classProjectionVals.index(max(classProjectionVals))
1017            s = sum(classProjectionVals)
1018            dist = orange.DiscDistribution([val/float(s) for val in classProjectionVals]);  dist.variable = self.clusterOptimizationDlg.rawData.domain.classVar
1019
1020            classValue = self.clusterOptimizationDlg.rawData.domain.classVar[ind]
1021            s = '<nobr>Based on current classification settings, the example would be classified </nobr><br><nobr>to class <b>%(cls)s</b> with probability <b>%(prob).2f%%</b>.</nobr><br><nobr>Predicted class distribution is:</nobr><br>' % {"cls": classValue, "prob": dist[classValue]*100}
1022            for key in dist.keys():
1023                s += "<nobr>&nbsp &nbsp &nbsp &nbsp %s : %.2f%%</nobr><br>" % (key, dist[key]*100)
1024            if argumentCount > allowedArguments:
1025                s += "<nobr>Note: To get the current prediction, <b>%(fa)d</b> arguments had to be used (instead of %(ac)d)<br>" % {"fa": argumentCount, "ac": allowedArguments}
1026            print s[:-4]
1027
1028            return (classValue, dist)
1029        else:
1030            for (value, closure, vertices, attrList, classValue, enlargedClosure, other, strList) in self.clusterOptimizationDlg.allResults:
1031                if type(classValue) != dict: continue       # the projection contains several clusters
1032                qApp.processEvents()
1033
1034                attrIndices = [self.visualizationWidget.graph.attributeNameIndex[attr] for attr in attrList]
1035                data = self.visualizationWidget.graph.createProjectionAsExampleTable(attrIndices, jitterSize = 0.001 * self.clusterOptimizationDlg.jitterDataBeforeTriangulation)
1036                graph, valueDict, closureDict, polygonVerticesDict, enlargedClosureDict, otherDict = self.clusterOptimizationDlg.evaluateClusters(data)
1037                evaluation = []
1038                for key in valueDict.keys():
1039                    evaluation.append((self.clusterOptimizationDlg.isExampleInsideCluster(attrList, testExample, closureDict[key], polygonVerticesDict[key], otherDict[key][OTHER_AVERAGE_DIST]), int(graph.objects[polygonVerticesDict[key][0]].getclass()), valueDict[key]))
1040
1041                evaluation.sort(); evaluation.reverse()
1042                if len(evaluation) == 0 or (len(evaluation) > 0 and evaluation[0][0] == 0): continue # if the point wasn't in any of the clusters or near them then continue
1043                elif len(evaluation) > 0 and evaluation[0][0] == 1 and self.clusterOptimizationDlg.conditionForArgument == MUST_BE_INSIDE: continue
1044                elif len(evaluation) == 1 or evaluation[0][0] != evaluation[1][0]:
1045                    argumentCount += 1
1046                    argumentValues.append((evaluation[0][2], evaluation[0][1]))
1047
1048                # find 10 more arguments than it is necessary - this is because with a different fold of data the clusters can be differently evaluated
1049                if argumentCount >= self.clusterOptimizationDlg.argumentCounts[self.clusterOptimizationDlg.argumentCountIndex]:
1050                    argumentValues.sort(); argumentValues.reverse()
1051                    vals = [0.0 for i in range(len(self.clusterOptimizationDlg.rawData.domain.classVar.values))]
1052                    neededArguments = self.clusterOptimizationDlg.argumentCounts[self.clusterOptimizationDlg.argumentCountIndex]
1053                    sufficient = 0; consideredArguments = 0
1054                    for (val, c) in argumentValues:
1055                        if self.clusterOptimizationDlg.useProjectionValue:  vals[c] += val
1056                        else:                                               vals[c] += 1
1057                        consideredArguments += 1
1058                        if consideredArguments >= neededArguments and (not self.clusterOptimizationDlg.canUseMoreArguments or (max(vals)*100.0 / sum(vals) > self.clusterOptimizationDlg.moreArgumentsNums[self.clusterOptimizationDlg.moreArgumentsIndex]) or consideredArguments >= neededArguments + 30):
1059                            sufficient = 1
1060                            break
1061
1062                    # if we got enough arguments to make a prediction then do it
1063                    if sufficient:
1064                        ind = vals.index(max(vals))
1065                        s = sum(vals)
1066                        dist = orange.DiscDistribution([val/float(s) for val in vals]);  dist.variable = self.clusterOptimizationDlg.rawData.domain.classVar
1067
1068                        classValue = self.clusterOptimizationDlg.rawData.domain.classVar[ind]
1069                        s = '<nobr>Based on current classification settings, the example would be classified </nobr><br><nobr>to class <b>%(cls)s</b> with probability <b>%(prob).2f%%</b>.</nobr><br><nobr>Predicted class distribution is:</nobr><br>' % {"cls": classValue, "prob": dist[classValue]*100}
1070                        for key in dist.keys():
1071                            s += "<nobr>&nbsp &nbsp &nbsp &nbsp %s : %.2f%%</nobr><br>" % (key, dist[key]*100)
1072                        if consideredArguments > neededArguments:
1073                            s += "<nobr>Note: To get the current prediction, <b>%(fa)d</b> arguments had to be used (instead of %(ac)d)<br>" % {"fa": consideredArguments, "ac": neededArguments}
1074                        print s[:-4]
1075
1076                        return (classValue, dist)
1077
1078            # if we ran out of projections before we could get a reliable prediction use what we have
1079            vals = [0.0 for i in range(len(self.clusterOptimizationDlg.rawData.domain.classVar.values))]
1080            argumentValues.sort(); argumentValues.reverse()
1081            for (val, c) in argumentValues:
1082                if self.clusterOptimizationDlg.useProjectionValue:  vals[c] += val
1083                else:                                               vals[c] += 1
1084
1085            if max(vals) == 0.0:
1086                print "there are no arguments for this example in the current projection list."
1087                dist = orange.DiscDistribution([1/float(len(vals)) for i in range(len(vals))]); dist.variable = self.clusterOptimizationDlg.rawData.domain.classVar
1088                return (self.clusterOptimizationDlg.rawData.domain.classVar[0], dist)
1089
1090            ind = vals.index(max(vals))
1091            s = sum(vals)
1092            dist = orange.DiscDistribution([val/float(s) for val in vals]);  dist.variable = self.clusterOptimizationDlg.rawData.domain.classVar
1093
1094            classValue = self.clusterOptimizationDlg.rawData.domain.classVar[ind]
1095            s = '<nobr>Based on current classification settings, the example would be classified </nobr><br><nobr>to class <b>%(cls)s</b> with probability <b>%(prob).2f%%</b>.</nobr><br><nobr>Predicted class distribution is:</nobr><br>' % {"cls": classValue, "prob": dist[classValue]*100}
1096            for key in dist.keys():
1097                s += "<nobr>&nbsp &nbsp &nbsp &nbsp %s : %.2f%%</nobr><br>" % (key, dist[key]*100)
1098            s += "<nobr>Note: There were not enough projections to get a reliable prediction<br>"
1099            print s[:-4]
1100            return (classValue, dist)
1101
1102
1103class clusterLearner(orange.Learner):
1104    def __init__(self, clusterOptimizationDlg, visualizationWidget):
1105        self.clusterOptimizationDlg = clusterOptimizationDlg
1106        self.visualizationWidget = visualizationWidget
1107        self.name = self.clusterOptimizationDlg.parentWidget.clusterClassifierName
1108        self.firstTime = 1
1109
1110    def __call__(self, examples, weightID = 0):
1111        classifier = clusterClassifier(self.clusterOptimizationDlg, self.visualizationWidget, examples, self.firstTime)
1112        self.firstTime = 0
1113        return classifier
1114
1115
1116# ############################################################################################################################################
1117# ############################################################################################################################################
1118
1119
1120# compute union of two lists
1121def union(list1, list2):
1122    union_dict = {}
1123    for e in list1: union_dict[e] = 1
1124    for e in list2: union_dict[e] = 1
1125    return union_dict.keys()
1126
1127# compute intersection of two lists
1128def intersection(list1, list2):
1129    int_dict = {}
1130    list1_dict = {}
1131    for e in list1:
1132        list1_dict[e] = 1
1133    for e in list2:
1134        if list1_dict.has_key(e): int_dict[e] = 1
1135    return int_dict.keys()
1136
1137# on which side of the line through (x1,y1), (x2,y2) lies the point (xTest, yTest)
1138def getPosition(x1, y1, x2, y2, xTest, yTest):
1139    if x1 == x2:
1140        if xTest > x1: return 1
1141        else: return -1
1142    k = (y2-y1)/float(x2-x1)
1143    c = y1 - k*x1
1144    if k*xTest - yTest > -c : return 1
1145    else: return -1
1146
1147# compute distances between all connected vertices and store it in the DISTANCE field
1148def computeDistances(graph):
1149    for (i,j) in graph.getEdges():
1150        xdiff = graph.objects[i][0] - graph.objects[j][0]
1151        ydiff = graph.objects[i][1] - graph.objects[j][1]
1152        graph[i,j][DISTANCE] = sqrt(xdiff*xdiff + ydiff*ydiff)
1153
1154
1155# Slightly deficient function to determine if the two lines p1, p2 and p2, p3 turn in counter clockwise direction
1156def ccw(p1, p2, p3):
1157    dx1 = p2[0] - p1[0]; dy1 = p2[1] - p1[1]
1158    dx2 = p3[0] - p2[0]; dy2 = p3[1] - p2[1]
1159    if(dy1*dx2 < dy2*dx1): return 1
1160    else: return 0
1161
1162
1163# do lines with (p1,p2) and (p3,p4) intersect?
1164def lineIntersect(p1, p2, p3, p4):
1165    return ((ccw(p1, p2, p3) != ccw(p1, p2, p4)) and (ccw(p3, p4, p1) != ccw(p3, p4, p2)))
1166
1167# compute distance between points (x1,y1), (x2, y2)
1168def computeDistance(x1, y1, x2, y2):
1169    return sqrt((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2))
1170
1171# remove edges to different class values from the graph. set VALUE field of such edges to newVal
1172def removeEdgesToDifferentClasses(graph, newVal):
1173    if newVal == None:
1174        for (i,j) in graph.getEdges():
1175            if graph.objects[i].getclass() != graph.objects[j].getclass(): graph[i,j] = None
1176    else:
1177        for (i,j) in graph.getEdges():
1178            if graph.objects[i].getclass() != graph.objects[j].getclass(): graph[i,j][VALUE] = newVal
1179
1180
1181# for a given dictionary that contains edges on the closure, return a dictionary with these points as keys and a list of points with a different class value as the value
1182# this is used when we have to compute the nearest points that belong to a different class
1183def getPointsWithDifferentClassValue(graph, closureDict):
1184    differentClassPoinsDict = {}
1185    for key in closureDict:
1186        vertices = union([i for (i,j) in closureDict[key]], [j for (i,j) in closureDict[key]])
1187        points = []
1188        for v in vertices:
1189            for n in graph.getNeighbours(v):
1190                if graph.objects[v].getclass() == graph.objects[n].getclass() or n in points: continue
1191                points.append(n)
1192        differentClassPoinsDict[key] = points
1193    return differentClassPoinsDict
1194
1195
1196# remove edges, that do not connect to triangles
1197def removeSingleLines(graph, edgesDict, clusterDict, verticesDict, minimumValidIndex, newVal):
1198    for (i,j) in graph.getEdges():
1199        if graph[i,j][VALUE] < minimumValidIndex: continue
1200        merged = graph.getNeighbours(i) + graph.getNeighbours(j)
1201        merged.sort()
1202        k=0; found = 0;
1203        for k in range(len(merged)-1):
1204            if merged[k] == merged[k+1] and graph[i, merged[k]][VALUE] >= minimumValidIndex and graph[j, merged[k]][VALUE] >= minimumValidIndex:
1205                found = 1; break
1206        if not found:
1207            if newVal == None: graph[i,j] = None            # delete the edge
1208            else:
1209                graph[i,j][VALUE] = newVal   # set the edge value
1210                graph[i,j][CLUSTER] = -1
1211            # remove the edge from the edgesDict
1212            if edgesDict and clusterDict and verticesDict:
1213                index = max(clusterDict[j], clusterDict[i])
1214                if index == -1: continue
1215                if (i,j) in edgesDict[index]: edgesDict[index].remove((i,j))
1216                elif (j,i) in edgesDict[index]: edgesDict[index].remove((j,i))
1217                if clusterDict[i] != -1: verticesDict[clusterDict[i]].remove(i)
1218                if clusterDict[j] != -1: verticesDict[clusterDict[j]].remove(j)
1219                clusterDict[j] = clusterDict[i] = -1
1220
1221# remove single lines and triangles that are now treated as their own clusters because they are probably insignificant
1222def removeSingleTrianglesAndLines(graph, edgesDict, clusterDict, verticesDict, newVal):
1223    for key in edgesDict.keys():
1224        if len(verticesDict[key]) < 4:
1225            for (i,j) in edgesDict[key]:
1226                graph[i,j][VALUE] = newVal
1227                graph[i,j][CLUSTER] = -1
1228                clusterDict[i] = -1; clusterDict[j] = -1;
1229            verticesDict.pop(key)
1230            edgesDict.pop(key)
1231
1232# make clusters smaller by removing points on the hull that lie closer to points in opposite class
1233def removeDistantPointsFromClusters(graph, edgesDict, clusterDict, verticesDict, closureDict, newVal):
1234    for key in closureDict.keys():
1235        edges = closureDict[key]
1236        vertices = union([i for (i,j) in edges], [j for (i,j) in edges])
1237
1238        for vertex in vertices:
1239            correctClass = int(graph.objects[vertex].getclass())
1240            correct = []; incorrect = []
1241            for n in graph.getNeighbours(vertex):
1242                if int(graph.objects[n].getclass()) == correctClass: correct.append(graph[vertex,n][DISTANCE])
1243                else: incorrect.append(graph[vertex,n][DISTANCE])
1244
1245            # if the distance to the correct class value is greater than to the incorrect class -> remove the point from the
1246            if correct == [] or (len(incorrect) > 0 and min(correct) > min(incorrect)):
1247                # if closure will degenerate into a line -> remove the remaining vertices
1248                correctClassNeighbors = []
1249                for n in graph.getNeighbours(vertex):
1250                    if int(graph.objects[n].getclass()) == correctClass and graph[vertex,n][CLUSTER] != -1: correctClassNeighbors.append(n)
1251                for i in correctClassNeighbors:
1252                    for j in correctClassNeighbors:
1253                        if i==j: continue
1254                        if (i,j) in edges:
1255                            graph[i,j][CLUSTER] = -1
1256                            if newVal == None:  graph[i,j] = None
1257                            else:               graph[i,j][VALUE] = newVal
1258                            if i in verticesDict[key]: verticesDict[key].remove(i)
1259                            if j in verticesDict[key]: verticesDict[key].remove(j)
1260                            clusterDict[i] = -1; clusterDict[j] = -1
1261                            if (i,j) in edgesDict[key]: edgesDict[key].remove((i,j))
1262                            else: edgesDict[key].remove((j,i))
1263                            edges.remove((i,j))
1264
1265                # remove the vertex from the closure
1266                for n in graph.getNeighbours(vertex):
1267                    if int(graph.objects[n].getclass()) != correctClass or graph[vertex,n][CLUSTER] == -1: continue
1268                    if newVal == None:  graph[vertex,n] = None
1269                    else:               graph[vertex,n][VALUE] = newVal
1270                    if (n, vertex) in edgesDict[graph[vertex,n][CLUSTER]]: edgesDict[graph[vertex,n][CLUSTER]].remove((n,vertex))
1271                    elif (vertex, n) in edgesDict[graph[vertex,n][CLUSTER]]: edgesDict[graph[vertex,n][CLUSTER]].remove((vertex, n))
1272                    graph[vertex,n][CLUSTER] = -1
1273                if clusterDict[vertex] != -1: verticesDict[key].remove(vertex)
1274                #verticesDict[clusterDict[vertex]].remove(vertex)
1275                clusterDict[vertex] = -1
1276
1277
1278# mark edges on the closure with the outerEdgeValue and edges inside a cluster with a innerEdgeValue
1279# algorithm: for each edge, find all triangles that contain this edge. if the number of triangles is 1, then this edge is on the closure.
1280# if there is a larger number of triangles, then they must all lie on the same side of this edge, otherwise this edge is not on the closure
1281def computeClosure(graph, edgesDict, minimumValidIndex, innerEdgeValue, outerEdgeValue, differentClassValue, tooDistantValue):
1282    closureDict = {}
1283    for key in edgesDict.keys(): closureDict[key] = []  # create dictionary where each cluster will contain all edges that lie on the closure
1284    for (i,j) in graph.getEdges():
1285        if graph[i,j][VALUE] < minimumValidIndex or graph[i,j][CLUSTER] == -1: continue
1286        merged = intersection(graph.getNeighbours(i), graph.getNeighbours(j))
1287        sameClassPoints = []; otherClassPoints = []
1288        for v in merged:
1289            if graph[i, v][VALUE] >= minimumValidIndex and graph[j, v][VALUE] >= minimumValidIndex: sameClassPoints.append(v)
1290            elif graph[i, v][VALUE] == graph[j, v][VALUE] == differentClassValue: otherClassPoints.append(v)
1291            elif graph[i,v][VALUE] == tooDistantValue or graph[j,v][VALUE] == tooDistantValue:
1292                for n in graph.getNeighbours(v):
1293                    if graph[v,n][VALUE] == differentClassValue and n not in otherClassPoints: otherClassPoints.append(n)
1294        outer = 1; outer2 = 0
1295        if sameClassPoints == []: outer = 0
1296        elif len(sameClassPoints) > 0:
1297            dir = getPosition(graph.objects[i][0].value, graph.objects[i][1].value, graph.objects[j][0].value, graph.objects[j][1].value, graph.objects[sameClassPoints[0]][0].value, graph.objects[sameClassPoints[0]][1].value)
1298            for val in sameClassPoints[1:]:
1299                dir2 = getPosition(graph.objects[i][0].value, graph.objects[i][1].value, graph.objects[j][0].value, graph.objects[j][1].value, graph.objects[val][0].value, graph.objects[val][1].value)
1300                if dir != dir2: outer = 0; break
1301            if otherClassPoints != []:   # test if a point from a different class is lying inside one of the triangles
1302                for o in otherClassPoints:
1303                    val = 0; nearerPoint = 0
1304                    for s in sameClassPoints:
1305                        # check if o is inside (i,j,s) triangle
1306                        val += pointInsideCluster (graph.objects, [(i,j), (j,s), (s,i)], graph.objects[o][0].value, graph.objects[o][1].value)
1307                        # check if another point from sameClassPoints is inside (i,j,o) triangle - if it is, then (i,j) definitely isn't on the closure
1308                        nearerPoint += pointInsideCluster(graph.objects, [(i,j), (j,o), (o,i)], graph.objects[s][0].value, graph.objects[s][1].value)
1309                    if val > 0 and nearerPoint == 0: outer2 = 1
1310        if outer + outer2 == 1:      # if outer is 0 or more than 1 than it is not an outer edge
1311            graph[i,j][VALUE] = outerEdgeValue
1312            closureDict[graph[i,j][CLUSTER]].append((i,j))
1313        else:
1314            graph[i,j][VALUE] = innerEdgeValue
1315    return closureDict
1316
1317# produce a dictionary with edges, clusters and vertices. clusters are enumerated from 1 up. Each cluster has a different value.
1318# minimumValidValue - edges that have smaller value than this will be ignored and will represent boundary between different clusters
1319def enumerateClusters(graph, minimumValidValue):
1320    clusterIndex = 1
1321    verticesDict = {}
1322    clusterDict = {}
1323    edgesDict = {}
1324    for (i,j) in graph.getEdges(): graph[i,j][CLUSTER] = -1   # initialize class cluster
1325    for i in range(graph.nVertices): clusterDict[i] = -1
1326
1327    for i in range(graph.nVertices):
1328        if graph.getNeighbours(i) == [] or clusterDict[i] != -1: continue
1329        edgesDict[clusterIndex] = []
1330        verticesDict[clusterIndex] = []
1331        verticesToSearch = [i]
1332        while verticesToSearch != []:
1333            current = verticesToSearch.pop()
1334            if clusterDict[current] != -1: continue
1335            clusterDict[current] = clusterIndex
1336            verticesDict[clusterIndex].append(current)
1337            for n in graph.getNeighbours(current):
1338                if graph[current,n][VALUE] < minimumValidValue: continue
1339                if clusterDict[n] == -1:
1340                    verticesToSearch.append(n)
1341                    edgesDict[clusterIndex].append((n, current))
1342                    graph[current, n][CLUSTER] = clusterIndex
1343        clusterIndex += 1
1344    return (edgesDict, clusterDict, verticesDict, clusterIndex-1)
1345
1346# for edges that have too small number of vertices set edge value to insignificantEdgeValue
1347# for edges that fall apart because of the distance between points set value to removedEdgeValue
1348def computeAlphaShapes(graph, edgesDict, alphaShapesValue, insignificantEdgeValue, removedEdgeValue):
1349    for key in edgesDict.keys():
1350        edges = edgesDict[key]
1351        if len(edges) < 5:
1352            for (i,j) in edges: graph[i,j][VALUE] = insignificantEdgeValue
1353            continue
1354
1355        # remove edges that are of lenghts average + alphaShapesValue * standard deviation
1356        lengths = [graph[i,j][DISTANCE] for (i,j) in edges]
1357        ave = sum(lengths) / len(lengths)
1358        std = sqrt(sum([(x-ave)*(x-ave) for x in lengths])/len(lengths))
1359        allowedDistance = ave + alphaShapesValue * std
1360        for index in range(len(lengths)):
1361            if lengths[index] > allowedDistance: graph[edges[index][0], edges[index][1]][VALUE] = removedEdgeValue
1362
1363
1364# alphashapes removed some edges. if after clustering two poins of the edge still belong to the same cluster, restore the deleted edges
1365def fixDeletedEdges(graph, edgesDict, clusterDict, deletedEdgeValue, repairValue, deleteValue):
1366    for (i,j) in graph.getEdges():
1367        if graph[i,j][VALUE] == deletedEdgeValue:
1368            if clusterDict[i] == clusterDict[j]:           # points still belong to the same cluster. restore the values
1369                graph[i,j][VALUE] = repairValue            # restore the value of the edge
1370                graph[i,j][CLUSTER] = clusterDict[i]       # reset the edge value
1371                edgesDict[clusterDict[i]].append((i,j))    # re-add the edge to the list of edges
1372            else:       # points belong to a different clusters. delete the values
1373                if deleteValue == None: graph[i,j] = None
1374                else:                   graph[i,j][VALUE] = deleteValue
1375
1376
1377# compute the area of the polygons that are not necessarily convex
1378# can be used to measure how good one cluster is. Currently is is not used
1379def computeAreas(graph, edgesDict, clusterDict, verticesDict, closureDict, outerEdgeValue):
1380    areaDict = {}
1381
1382    for key in closureDict.keys():
1383        # first check if the closure is really a closure, by checking if it is connected into a circle
1384        # if it is not, remove this closure from all dictionaries
1385        vertices = [i for (i,j) in closureDict[key]] + [j for (i,j) in closureDict[key]]
1386        vertices.sort()
1387        valid = 1
1388        if len(vertices) % 2 != 0: valid = 0
1389        else:
1390            for i in range(len(vertices)/2):
1391                if vertices[2*i] != vertices[2*i+1]: valid = 0; break
1392        if not valid:
1393            print "found and ignored an invalid group", graph.objects.domain[0].name, graph.objects.domain[1].name, closureDict[key]
1394            areaDict[key] = 0
1395            continue
1396
1397        # then compute its area size
1398        currArea = 0.0
1399        edges = closureDict[key] # select outer edges
1400        coveredEdges = []
1401        while len(coveredEdges) < len(edges):
1402            for (e1, e2) in edges:
1403                if (e1, e2) not in coveredEdges and (e2, e1) not in coveredEdges: break
1404            polygons = computePolygons(graph, (e1, e2), outerEdgeValue)
1405            for poly in polygons:
1406                currArea += computeAreaOfPolygon(graph, poly)       # TO DO: currently this is incorrect. we should first check if the polygon poly lies inside one of the other polygons. if true, than the area should be subtracted
1407                for i in range(len(poly)-2): coveredEdges.append((poly[i], poly[i+1]))
1408                coveredEdges.append((poly[0], poly[-1]))
1409        areaDict[key] = currArea
1410    return areaDict
1411
1412# used when computing area of polygon
1413# for a given graph and starting edge return a set of polygons that represent the boundaries. in the simples case there will be only one polygon.
1414# However there can be also subpolygons and connected polygons
1415# this method is unable to guaranty correct computation of polygons in case there is an ambiguity - such as in case of 3 connected triangles where each shares two vertices with the other two triangles.
1416def computePolygons(graph, startingEdge, outerEdgeValue):
1417    waitingEdges = [startingEdge]
1418    takenEdges = []
1419    polygons = []
1420    while waitingEdges != []:
1421        (start, end) = waitingEdges.pop()
1422        if (start, end) in takenEdges or (end,start) in takenEdges: continue
1423        subpath = [start]
1424        while end != subpath[0]:
1425            neighbors = graph.getNeighbours(end)
1426            outerEdges = []
1427            for n in neighbors:
1428                if graph[end, n][VALUE] == outerEdgeValue and n != start and (n, end) not in takenEdges and (end, n) not in takenEdges:
1429                    outerEdges.append(n)
1430            if end in subpath:
1431                i = subpath.index(end)
1432                polygons.append(subpath[i:])
1433                subpath = subpath[:i]
1434            if outerEdges == []:
1435                print "there is a bug in computePolygons function", graph.objects.domain, startingEdge, end, neighbors
1436                break
1437            subpath.append(end)
1438            takenEdges.append((start, end))
1439            start = end; end = outerEdges[0]
1440            outerEdges.remove(end)
1441            if len(outerEdges) > 0:
1442                for e in outerEdges:
1443                    if (start, e) not in waitingEdges and (e, start) not in waitingEdges: waitingEdges.append((start, e))
1444        takenEdges.append((subpath[-1], subpath[0]))
1445        polygons.append(subpath)
1446    return polygons
1447
1448# fast computation of the area of the polygon
1449# formula can be found at http://en.wikipedia.org/wiki/Polygon
1450def computeAreaOfPolygon(graph, polygon):
1451    polygon = polygon + [polygon[0]]
1452    tempArea = 0.0
1453    for i in range(len(polygon)-1):
1454        tempArea += graph.objects[polygon[i]][0]*graph.objects[polygon[i+1]][1] - graph.objects[polygon[i+1]][0]*graph.objects[polygon[i]][1]
1455    return abs(tempArea)
1456
1457# does the point lie inside or on the edge of a cluster described with edges in closure
1458# algorithm from computational geometry in c (Jure Zabkar)
1459def pointInsideCluster(data, closure, xTest, yTest):
1460    # test if the point is the same as one of the points in the closure
1461    for (p1, p2) in closure:
1462        if data[p1][0] == xTest and data[p1][1] == yTest: return 1
1463        if data[p2][0] == xTest and data[p2][1] == yTest: return 1
1464
1465    count = 0
1466    for (p2, p3) in closure:
1467        x1 = data[p2][0] - xTest; y1 = data[p2][1] - yTest
1468        x2 = data[p3][0] - xTest; y2 = data[p3][1] - yTest
1469        if (y1 > 0 and y2 <= 0) or (y2 > 0 and y1 <= 0):
1470            x = (x1 * y2 - x2 * y1) / (y2 - y1)
1471            if x > 0: count += 1
1472    return count % 2
1473
1474
1475# compute average distance between points inside each cluster
1476def computeAverageDistance(graph, edgesDict):
1477    ave_distDict = {}
1478    for key in edgesDict.keys():
1479        distArray = []
1480        for (v1,v2) in edgesDict[key]:
1481            d = sqrt((graph.objects[v1][0]-graph.objects[v2][0])*(graph.objects[v1][0]-graph.objects[v2][0]) + (graph.objects[v1][1]-graph.objects[v2][1])*(graph.objects[v1][1]-graph.objects[v2][1]))
1482            distArray.append(d)
1483        ave_distDict[key] = sum(distArray) / float(max(1, len(distArray)))
1484    return ave_distDict
1485
1486
1487# ############################################################################################################################################
1488# #############################      FUNCTIONS FOR COMPUTING ENLARGED CLOSURE     ############################################################
1489# ############################################################################################################################################
1490
1491# return a list of vertices that are connected to vertex and have an edge value value
1492def getNeighboursWithValue(graph, vertex, value):
1493    ret = []
1494    for n in graph.getNeighbours(vertex):
1495        if graph[vertex, n][VALUE] == value: ret.append(n)
1496    return ret
1497
1498# for neighbors of v find which left and right vertices will represent the boundary of the enlarged closure
1499# this is only called when a vertex has more than 2 edges that we know that lie on the closure. in this case we have to figure out which pairs of vertices in ns belong together
1500def getNextPoints(graph, v, ns, closure):
1501    ret = []
1502    status = {}
1503    while ns != []:
1504        e = ns.pop()
1505        x_e = 0.999*graph.objects[v][0] + 0.001*graph.objects[e][0]; y_e = 0.999*graph.objects[v][1] + 0.001*graph.objects[e][1]
1506        for e2 in ns:
1507            x_e2 = 0.999*graph.objects[v][0] + 0.001*graph.objects[e2][0]; y_e2 = 0.999*graph.objects[v][1] + 0.001*graph.objects[e2][1]
1508            intersect = 0
1509            for test in ns:
1510                if test == e2: continue
1511                intersect += lineIntersect((x_e, y_e), (x_e2, y_e2), (graph.objects[v][0], graph.objects[v][1]), (graph.objects[test][0], graph.objects[test][1]))
1512            status[(e,e2)] = intersect
1513
1514    for (e,e2) in status.keys():
1515        if not status.has_key((e,e2)): continue
1516        if status[(e,e2)] == 0:
1517            ret.append((e,e2))
1518            for (f, f2) in status.keys():
1519                if f == e or f == e2 or f2 == e or f2 == e2:
1520                    status.pop((f,f2))
1521
1522    for (e,e2) in status.keys():
1523        if not status.has_key((e,e2)): continue
1524        ret.append((e,e2))
1525        for (f, f2) in status.keys():
1526            if f == e or f == e2 or f2 == e or f2 == e2:
1527                status.pop((f,f2))
1528    return ret
1529
1530# add val to the dictionary under the key key
1531def addPointsToDict(dictionary, key, val):
1532    if dictionary.has_key(key): dictionary[key] = dictionary[key] + [val]
1533    else: dictionary[key] = [val]
1534
1535# return a list that possibly contains multiple lists. in each list there are vertices sorted in the way as they are ordered on the closure
1536def getSortedClosurePoints(graph, closure):
1537    dicts = []
1538    tempDicts = []
1539    tempD = {}
1540
1541    pointDict = {}
1542    points = union([i for (i,j) in closure], [j for (i,j) in closure])
1543    for p in points:
1544        ns = getNeighboursWithValue(graph, p, 2)
1545        if len(ns) > 2:
1546            split = getNextPoints(graph, p, ns, closure)       # splited is made of [(l1, r1), (l2, r2), ...]
1547        else: split = [(ns[0], ns[1])]
1548        pointDict[p] = split
1549
1550    lists = []
1551    while pointDict.keys() != []:
1552        for start in pointDict.keys():
1553            if len(pointDict[start]) == 1:
1554                currList = [start, pointDict[start][0][0]]
1555                last = pointDict[start][0][1]
1556                pointDict.pop(start)
1557                break
1558        lastAdded = currList[-1]
1559        while lastAdded != start:
1560            for (l,r) in pointDict[lastAdded]:
1561                if l == currList[-2]:
1562                    currList.append(r)
1563                    pointDict[lastAdded].remove((l,r))
1564                    if pointDict[lastAdded] == []: pointDict.pop(lastAdded)
1565                    lastAdded = r
1566                    break
1567                elif r == currList[-2]:
1568                    currList.append(l)
1569                    pointDict[lastAdded].remove((l,r))
1570                    if pointDict[lastAdded] == []: pointDict.pop(lastAdded)
1571                    lastAdded = l
1572                    break
1573        lists.append(currList[:-1])
1574    return lists
1575
1576# we are trying to blown a cluster. we therefore compute the new position of point point that lies outside the current cluster
1577def getEnlargedPointPosition(graph, point, left, right, dist, closure):
1578    y_diff1 = graph.objects[point][0] - graph.objects[left][0]
1579    x_diff1 = - (graph.objects[point][1] - graph.objects[left][1])
1580    d = sqrt(x_diff1*x_diff1 + y_diff1*y_diff1)
1581    x_diff1 /= d; y_diff1 /= d
1582
1583    y_diff2 = graph.objects[right][0] - graph.objects[point][0]
1584    x_diff2 = - (graph.objects[right][1] - graph.objects[point][1])
1585    d = sqrt(x_diff2*x_diff2 + y_diff2*y_diff2)
1586    x_diff2 /= d; y_diff2 /= d
1587
1588    x_diff = x_diff1 + x_diff2; y_diff = y_diff1 + y_diff2
1589    d = sqrt(x_diff*x_diff + y_diff*y_diff)
1590    x_diff /= d; y_diff /= d
1591    x_diff *= dist; y_diff *= dist
1592
1593    x = graph.objects[point][0] + x_diff*0.001
1594    y = graph.objects[point][1] + y_diff*0.001
1595    if pointInsideCluster(graph.objects, closure, 0.99*graph.objects[left][0] + 0.01*x, 0.99*graph.objects[left][1] + 0.01*y):
1596        return (graph.objects[point][0] - x_diff, graph.objects[point][1] - y_diff)
1597    else:
1598        return (graph.objects[point][0] + x_diff, graph.objects[point][1] + y_diff)
1599
1600
1601# this function computes a blown cluster. The cluster is enlarged for a half of average distance between edges inside the cluster
1602def enlargeClosure(graph, closure, aveDist):
1603    halfAveDist = aveDist / 2.0
1604    sortedClosurePoints = getSortedClosurePoints(graph, closure)
1605    merged = []
1606    for group in sortedClosurePoints:
1607        currMerged = []
1608        for i in range(len(group)):
1609            p = group[i]
1610            l = group[i-1]; r = group[(i+1)%len(group)]
1611            x, y = getEnlargedPointPosition(graph, p, l, r, halfAveDist, closure)
1612            currMerged.append((x,y))
1613        merged.append(currMerged)
1614    return merged
1615
1616
1617
1618#test widget appearance
1619if __name__=="__main__":
1620    import sys
1621    a = QApplication(sys.argv)
1622    ow = ClusterOptimization()
1623    a.setMainWidget(ow)
1624    ow.show()
1625    a.exec_()
Note: See TracBrowser for help on using the repository browser.