Changeset 3492:87d3a516c1c6 in orange
 Timestamp:
 04/04/07 13:37:53 (7 years ago)
 Branch:
 default
 Convert:
 918afbf622bc8f1dcbc8dc3a34d5c5177a4006a1
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/orngVizRank.py
r3413 r3492 22 22 23 23 # results in the list 24 ACCURACY = 0 24 ACCURACY = 0 25 25 OTHER_RESULTS = 1 26 26 LEN_TABLE = 2 … … 106 106 self.data = None 107 107 self.subsetData = None 108 108 109 109 self.results = [] 110 110 self.arguments = [] # a list of arguments 111 111 112 112 self.kValue = 10 113 113 self.percentDataUsed = 100 … … 122 122 self.useSupervisedPCA = 0 # use the supervisedPCA 123 123 self.useExampleWeighting = 0 # weight examples, so that the class that has a low number of examples will have higher weights 124 124 125 125 self.externalLearner = None # do we use knn or some external learner 126 126 self.selectedClasses = [] # which classes are we trying to separate … … 140 140 self.optimizeProjectionLimit = 0 141 141 self.optimizedProjectionsCount = 0 142 142 143 143 if visualizationMethod == SCATTERPLOT: self.parentName = "Scatterplot" 144 144 elif visualizationMethod == RADVIZ: self.parentName = "Radviz" 145 145 elif visualizationMethod == LINEAR_PROJECTION: self.parentName = "Linear Projection" 146 146 elif visualizationMethod == POLYVIZ: self.parentName = "Polyviz" 147 148 self.argumentCount = 1 # number of arguments used when classifying 147 148 self.argumentCount = 1 # number of arguments used when classifying 149 149 #self.argumentValueFormula = 1 # how to compute argument value 150 150 … … 152 152 self.locOptAllowAddingAttributes = 0 # do we allow increasing the number of visualized attributes 153 153 self.locOptMaxAttrsInProj = 20 # if self.locOptAllowAddingAttributes == 1 then what is the maximum number of attributes in a projection 154 self.locOptAttrsToTry = 50 # number of best ranked attributes to try 154 self.locOptAttrsToTry = 50 # number of best ranked attributes to try 155 155 self.locOptProjCount = 20 # try to locally optimize this number of best ranked projections 156 156 self.attributeNameIndex = {} # dict with indices to attributes … … 158 158 self.rankArgumentsByStrength = 0 # how do you want to compute arguments. if 0 then we go through the top ranked projection and classify. If 1 we rerank projections to projections with strong class prediction and use them for classification 159 159 self.storeEachPermutation = 0 # do we want to save information for each fold when evaluating projection  used to compute VizRank's accuracy 160 160 161 161 self.datasetName = "" 162 162 163 163 # 0  set to sqrt(N) 164 164 # 1  set to N / c … … 206 206 else: 207 207 kValue = self.getkValue(kValueFormula) 208 208 209 209 if self.percentDataUsed != 100: 210 210 kValue = int(kValue * self.percentDataUsed / 100.0) 211 211 else: 212 212 kValue = k 213 213 214 214 return orange.kNNLearner(k = kValue, rankWeight = 0, distanceConstructor = orange.ExamplesDistanceConstructor_Euclidean(normalize=0)) 215 215 216 216 217 217 def setData(self, data): … … 225 225 if not hasDiscreteClass: 226 226 return 227 227 228 228 self.selectedClasses = range(len(self.data.domain.classVar.values)) 229 229 230 230 if self.autoSetTheKValue: 231 231 if self.kValueFormula == 0 or not data.domain.classVar or data.domain.classVar.varType == orange.VarTypes.Continuous: … … 242 242 self.graph.setSubsetData(subData) 243 243 self.clearArguments() 244 244 245 245 def getEvaluatedAttributes(self): 246 246 return orngVisFuncts.evaluateAttributes(self.data, contMeasures[self.attrCont][1], discMeasures[self.attrDisc][1]) … … 267 267 if funct(accuracy, self.results[top][ACCURACY]) == accuracy: 268 268 return top 269 else: 269 else: 270 270 return bottom 271 271 … … 319 319 folds[fold].sort(); folds[fold].reverse() 320 320 self.results = [self.results[index] for (val, index) in folds[fold][:self.argumentCount+100]] 321 321 322 322 for i in range(len(data)): 323 323 if indices[i] != fold: continue … … 367 367 if testTable.domain.classVar.varType == orange.VarTypes.Discrete: 368 368 return self.computeAccuracyFromResults(testTable, results) 369 369 370 370 # for continuous class we can't compute brier score and classification accuracy 371 371 else: … … 393 393 nattr.getValueFrom = orange.ClassifierByLookupTable2(nattr, testTable.domain[0], testTable.domain[1]) 394 394 for i in range(NUMBER_OF_INTERVALS*NUMBER_OF_INTERVALS): nattr.getValueFrom.lookupTable[i] = i 395 395 396 396 for dist in orange.ContingencyAttrClass(nattr, testTable): 397 397 dist = list(dist) … … 431 431 prediction[res.actualClass] += val 432 432 countsByFold[res.iterationNumber] += 1 433 433 434 434 elif self.qualityMeasure == CLASS_ACCURACY: 435 435 #return 100*orngStat.CA(results)[0], results … … 440 440 elif self.qualityMeasure == AUC: 441 441 return orngStat.AUC(results)[0], None 442 442 443 443 # compute accuracy only for classes that are selected as interesting. other class values do not participate in projection evaluation 444 444 acc = sum(prediction) / float(max(1, len(results.results))) # accuracy over all class values … … 451 451 452 452 return val/max(1, float(s)), (acc, prediction, list(currentClassDistribution)) 453 453 454 454 455 455 # Argumentation functions … … 503 503 504 504 if 1 in [example[attr].isSpecial() for attr in attrList]: return None, None 505 505 506 506 attrIndices = [self.attributeNameIndex[attr] for attr in attrList] 507 507 attrVals = [self.graph.scaleExampleValue(example, ind) for ind in attrIndices] … … 509 509 table = self.graph.createProjectionAsExampleTable(attrIndices, settingsDict = generalDict) 510 510 [xTest, yTest] = self.graph.getProjectedPointPosition(attrIndices, attrVals, settingsDict = generalDict) 511 511 512 512 learner = self.externalLearner or self.createkNNLearner(k = kValue) 513 513 if self.useExampleWeighting: table, weightID = orange.Preprocessor_addClassWeight(table, equalize=1) 514 514 else: weightID = 0 515 515 516 516 classifier = learner(table, weightID) 517 517 classVal, dist = classifier(orange.Example(table.domain, [xTest, yTest, "?"]), orange.GetBoth) 518 518 return classVal, dist 519 519 520 520 521 521 def getArgumentIndex(self, value, classValue): … … 592 592 self.evaluationData["attrs"] = attributes 593 593 self.totalPossibilities = 0 594 594 595 595 # build list of indices for permutations of different number of attributes 596 596 permutationIndices = {} … … 630 630 if self.attrSubsetSelection == DETERMINISTIC_ALL: 631 631 self.evaluationData["z"] = (u >= maxLength and z+1) or z 632 632 633 633 self.evaluationData["combinations"] = combinations 634 634 return combinations … … 641 641 triedDict = self.evaluationData.get("triedCombinations", {}) 642 642 projCountWidth = len(triedDict.keys()) / 1000 643 643 644 644 if self.attrCont == CONT_MEAS_S2NMIX or self.attrSubsetSelection == GAMMA_SINGLE: 645 645 numClasses = len(self.data.domain.classVar.values) 646 646 attributes, attrsByClass = self.evaluationData["attrs"] 647 647 648 648 for i in range(maxTries): 649 649 attrList = [[] for c in range(numClasses)]; attrs = [] … … 677 677 return None 678 678 679 # generate possible permutations of the current attribute subset. use evaluationData dict to find which attribute subset to use. 679 # generate possible permutations of the current attribute subset. use evaluationData dict to find which attribute subset to use. 680 680 def getNextPermutations(self): 681 681 combinations = self.evaluationData["combinations"] … … 698 698 usedPerms[tuple(comb)] = 1 699 699 permutations.append(comb) 700 700 701 701 # create only one permutation, because its all we need 702 702 elif self.useSupervisedPCA: … … 730 730 def evaluateProjections(self): 731 731 random.seed(0) # always use the same seed to make results repeatable 732 if not self.data: return 732 if not self.data: return 0 733 733 self.correctSettingsIfNecessary() 734 734 if self.timeLimit == self.projectionLimit == 0 and self.__class__.__name__ == "VizRank": 735 735 print "Evaluation of projections was started without any time or projection restrictions. To prevent an indefinite projection evaluation a time limit of 2 hours was set." 736 736 self.timeLimit = 2 * 60 737 737 738 738 self.evaluatedProjectionsCount = 0 739 739 self.optimizedProjectionsCount = 0 … … 747 747 748 748 if self.__class__.__name__ == "OWVizRank": 749 from qt import qApp, QMessageBox 750 if self.attributeCount >= 10 and not (self.useSupervisedPCA) and self.visualizationMethod != SCATTERPLOT and self.attrSubsetSelection != GAMMA_SINGLE and QMessageBox.critical(self, 'VizRank', 'You chose to evaluate projections with a high number of attributes. Since VizRank has to evaluate different placements\nof these attributes there will be a high number of projections to evaluate. Do you still want to proceed?','Continue','Cancel', '', 0,1): 751 return 752 self.disableControls() 753 self.parentWidget.progressBarInit() 754 elif not self.data.domain.classVar or not self.data.domain.classVar.varType == orange.VarTypes.Discrete: 749 from qt import qApp 750 751 if not self.data.domain.classVar or not self.data.domain.classVar.varType == orange.VarTypes.Discrete: 755 752 print "Projections can be evaluated only for data with a discrete class." 756 return 757 753 return 0 754 758 755 if self.visualizationMethod == SCATTERPLOT: 759 756 evaluatedAttributes = orngVisFuncts.evaluateAttributes(self.data, contMeasures[self.attrCont][1], discMeasures[self.attrDisc][1]) … … 764 761 count = len(evaluatedAttributes)*(len(evaluatedAttributes)1)/2 765 762 strCount = orngVisFuncts.createStringFromNumber(count) 766 763 767 764 for i in range(len(evaluatedAttributes)): 768 765 for j in range(i): … … 770 767 self.evaluatedProjectionsCount += 1 771 768 if self.isEvaluationCanceled(): 772 self.finishEvaluation(self.evaluatedProjectionsCount) 773 return 774 769 return self.evaluatedProjectionsCount 770 775 771 table = self.graph.createProjectionAsExampleTable([attr1, attr2]) 776 772 if len(table) < self.minNumOfExamples: continue 777 773 accuracy, other_results = self.kNNComputeAccuracy(table) 778 774 self.addResult(accuracy, other_results, len(table), [self.data.domain[attr1].name, self.data.domain[attr2].name], self.evaluatedProjectionsCount, {}) 779 775 780 776 if self.__class__.__name__ == "OWVizRank": 781 777 self.setStatusBarText("Evaluated %s/%s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount), strCount)) … … 787 783 self.freeviz.useGeneralizedEigenvectors = 1 788 784 self.graph.normalizeExamples = 0 789 785 790 786 # replace attribute names with indices in domain  faster searching 791 classIndex = self.attributeNameIndex[self.data.domain.classVar.name] 787 classIndex = self.attributeNameIndex[self.data.domain.classVar.name] 792 788 793 789 # variables and domain for the table … … 803 799 newProjectionsExist = 1 804 800 while newProjectionsExist: 805 for experiment in range(maxLengthminLength+1): 806 if self.selectNextAttributeSubset(minLength, maxLength) != None: break807 newProjectionsExist = 0 801 for experiment in range(maxLengthminLength+1): 802 if self.selectNextAttributeSubset(minLength, maxLength): break 803 newProjectionsExist = 0 808 804 permutations = self.getNextPermutations() 809 805 while permutations: 810 806 attrIndices = permutations[0] 811 807 812 808 if self.useSupervisedPCA: 813 809 xanchors, yanchors, (attrNames, newIndices) = self.freeviz.findSPCAProjection(attrIndices, setGraphAnchors = 0, percentDataUsed = self.percentDataUsed) … … 817 813 accuracy, other_results = self.kNNComputeAccuracy(table) 818 814 self.addResult(accuracy, other_results, len(table), attrNames, self.evaluatedProjectionsCount, generalDict = {"XAnchors": list(xanchors), "YAnchors": list(yanchors)}) 819 if self.isEvaluationCanceled(): self.finishEvaluation(self.evaluatedProjectionsCount); return815 if self.isEvaluationCanceled(): return self.evaluatedProjectionsCount 820 816 if self.__class__.__name__ == "OWVizRank": self.setStatusBarText("Evaluated %s projections..." % (orngVisFuncts.createStringFromNumber(self.evaluatedProjectionsCount))) 821 817 else: … … 823 819 YAnchors = self.graph.createYAnchors(len(attrIndices)) 824 820 validData = self.graph.getValidList(attrIndices) 825 if numpy.sum(validData) >= self.minNumOfExamples: 821 if numpy.sum(validData) >= self.minNumOfExamples: 826 822 classList = numpy.compress(validData, classListFull) 827 823 selectedData = numpy.compress(validData, numpy.take(self.graph.noJitteringScaledData, attrIndices, axis = 0), axis = 1) … … 837 833 table = self.graph.createProjectionAsExampleTable(permutation, validData = validData, classList = classList, sum_i = sum_i, XAnchors = XAnchors, YAnchors = YAnchors, domain = domain) 838 834 accuracy, other_results = self.kNNComputeAccuracy(table) 839 835 840 836 # save the permutation 841 837 if self.storeEachPermutation: … … 854 850 855 851 if self.isEvaluationCanceled(): 856 self.finishEvaluation(self.evaluatedProjectionsCount) 857 return 858 859 permutations = self.getNextPermutations() 852 return self.evaluatedProjectionsCount 853 854 permutations = self.getNextPermutations() 860 855 else: 861 856 print "unknown visualization method" 862 857 863 self.finishEvaluation(self.evaluatedProjectionsCount) 864 865 866 def finishEvaluation(self, evaluatedProjections): 867 if self.__class__.__name__ == "OWVizRank": 868 secs = time.time()  self.startTime 869 self.setStatusBarText("Finished evaluation (evaluated %s projections in %d min, %d sec)" % (orngVisFuncts.createStringFromNumber(evaluatedProjections), secs/60, secs%60)) 870 self.parentWidget.progressBarFinished() 871 self.enableControls() 872 self.finishedAddingResults() 873 from qt import qApp; qApp.processEvents() 874 if self.parentWidget: self.parentWidget.showSelectedAttributes() 875 858 return self.evaluatedProjectionsCount 876 859 877 860 def getProjectionQuality(self, attrList, useAnchorData = 0): … … 883 866 def insertTempProjection(self, projections, acc, attrList): 884 867 if len(projections) == 0: return [(acc, attrList)] 885 868 886 869 top = 0; bottom = len(projections) 887 870 while (bottomtop) > 1: … … 907 890 self.optimizeProjectionLimit = 2 * 60 908 891 """ 909 910 if self.__class__.__name__ == "OWVizRank": 911 self.disableControls() 892 893 if self.__class__.__name__ == "OWVizRank": 912 894 from qt import qApp 913 895 914 896 attrs = [self.results[i][ATTR_LIST] for i in range(count)] # create a list of attributes that are in the top projections 915 897 attrs = [[self.attributeNameIndex[name] for name in projection] for projection in attrs] # find indices from the attribute names … … 940 922 if not tempDict.has_key((projection[0], attr)) and not tempDict.has_key((attr, projection[0])): testProjections.append([projection[0], attr]) 941 923 if not tempDict.has_key((projection[1], attr)) and not tempDict.has_key((attr, projection[1])): testProjections.append([attr, projection[1]]) 942 924 943 925 for testProj in testProjections: 944 926 table = self.graph.createProjectionAsExampleTable(testProj, domain = domain) … … 949 931 if acc > accuracy: 950 932 self.addResult(acc, other_results, len(table), [self.graph.attributeNames[i] for i in testProj], projIndex) 951 self.insertTempProjection(projections, acc, proj)952 tempDict[tuple( proj)] = 1933 self.insertTempProjection(projections, acc, testProj) 934 tempDict[tuple(testProj)] = 1 953 935 if max(acc, accuracy)/min(acc, accuracy) > 1.005: significantImprovement = 1 954 936 955 937 self.optimizedProjectionsCount += 1 956 938 if self.__class__ != VizRank: qApp.processEvents() # allow processing of other events 957 939 if self.optimizedProjectionsCount % 10 == 0 and self.isOptimizationCanceled(): 958 self.finishEvaluation(self.optimizedProjectionsCount) 959 return 940 return self.optimizedProjectionsCount 960 941 if significantImprovement: break 961 942 … … 992 973 failedConsecutiveTries = 0 993 974 triedPermutationsDict[str(newProj)] = 1 994 975 995 976 table = self.graph.createProjectionAsExampleTable(newProj, validData = validData, classList = classList, XAnchors = XAnchors, YAnchors = YAnchors, domain = domain) 996 977 if len(table) < self.minNumOfExamples: continue … … 998 979 self.optimizedProjectionsCount += 1 999 980 if self.__class__ != VizRank: qApp.processEvents() # allow processing of other events 1000 if self.isOptimizationCanceled(): self.finishEvaluation(self.optimizedProjectionsCount); return981 if self.isOptimizationCanceled(): return self.optimizedProjectionsCount 1001 982 if hasattr(self, "setStatusBarText") and self.optimizedProjectionsCount % 10 == 0: 1002 983 self.setStatusBarText("Evaluated %s projections. Last accuracy was: %2.2f%%" % (orngVisFuncts.createStringFromNumber(self.optimizedProjectionsCount), acc)) … … 1018 999 for iteration in range(2): 1019 1000 if iteration == 1 and not self.locOptAllowAddingAttributes: continue # if we are not allowed to increase the number of visualized attributes 1020 if (len(projection) + iteration > self.locOptMaxAttrsInProj): continue 1001 if (len(projection) + iteration > self.locOptMaxAttrsInProj): continue 1021 1002 strTotalAtts = orngVisFuncts.createStringFromNumber(lenOfAttributes) 1022 1003 for (attrIndex, attr) in enumerate(attributes): … … 1041 1022 self.optimizedProjectionsCount += 1 1042 1023 acc, other_results = self.kNNComputeAccuracy(table) 1043 1024 1044 1025 tempList.append((acc, other_results, len(table), newIndices, {"XAnchors": xanchors, "YAnchors": yanchors})) 1045 1026 if self.storeEachPermutation: … … 1047 1028 1048 1029 if self.__class__ != VizRank: qApp.processEvents() # allow processing of other events 1049 if self.isOptimizationCanceled(): self.finishEvaluation(self.optimizedProjectionsCount); return1030 if self.isOptimizationCanceled(): return self.optimizedProjectionsCount 1050 1031 1051 1032 # ordinary radviz projections … … 1063 1044 validData = self.graph.getValidList(testProjections[0]) 1064 1045 classList = numpy.compress(validData, classListFull) 1065 1046 1066 1047 for testProj in testProjections: 1067 1048 if newProjDict.has_key(str(testProj)): continue 1068 1049 newProjDict[str(testProj)] = 1 1069 1050 1070 1051 table = self.graph.createProjectionAsExampleTable(testProj, validData = validData, classList = classList, XAnchors = XAnchors, YAnchors = YAnchors, domain = domain) 1071 1052 if len(table) < self.minNumOfExamples: continue … … 1079 1060 self.optimizedProjectionsCount += 1 1080 1061 if self.__class__ != VizRank: qApp.processEvents() # allow processing of other events 1081 if self.isOptimizationCanceled(): self.finishEvaluation(self.optimizedProjectionsCount); return1062 if self.isOptimizationCanceled(): return self.optimizedProjectionsCount 1082 1063 1083 1064 # return only the best attribute placements … … 1093 1074 print "unknown visualization method" 1094 1075 1095 self.finishEvaluation(self.optimizedProjectionsCount)1076 return self.optimizedProjectionsCount 1096 1077 1097 1078 # ############################################################## … … 1112 1093 # open, write and save file 1113 1094 file = open(name, "wt") 1114 1095 1115 1096 attrs = ["kValue", "percentDataUsed", "qualityMeasure", "testingMethod", "parentName", "evaluationAlgorithm", "useExampleWeighting", "useSupervisedPCA", "attrSubsetSelection", "optimizationType", "attributeCount", "attrDisc", "attrCont", "timeLimit", "projectionLimit"] 1116 1097 dict = {} … … 1118 1099 dict["dataCheckSum"] = self.data.checksum() 1119 1100 dict["totalProjectionsEvaluated"] = self.evaluatedProjectionsCount + self.optimizedProjectionsCount # let's also save the total number of projections that we evaluated in order to get this list 1120 1101 1121 1102 file.write("%s\n%s\n" % (str(dict), str(self.selectedClasses))) 1122 1103 … … 1126 1107 1127 1108 (acc, other_results, lenTable, attrList, tryIndex, generalDict) = results[i] 1128 1109 1129 1110 s = "(%.3f, (" % (acc) 1130 1111 for val in other_results: … … 1159 1140 if self.__class__.__name__ == "OWVizRank": 1160 1141 import qt 1161 1142 1162 1143 file = open(name, "rt") 1163 1144 settings = eval(file.readline()[:1]) … … 1176 1157 else: 1177 1158 print "The data set has a different checksum than the data set that was used in projection evaluation. Projection might be invalid but the file will be loaded anyway..." 1178 1159 1179 1160 for key in settings.keys(): 1180 1161 setattr(self, key, settings[key]) 1181 1182 # find if it was computed for specific class values 1162 1163 # find if it was computed for specific class values 1183 1164 selectedClasses = eval(file.readline()[:1]) 1184 1165 1185 1166 count = 0 1186 1167 for line in file.xreadlines(): … … 1221 1202 1222 1203 if self.VizRank.__class__.__name__ == "OWVizRank": 1223 self.VizRank.parentWidget. cdata(data)1204 self.VizRank.parentWidget.setData(data) 1224 1205 #self.VizRank.useTimeLimit = 1 1225 1206 self.VizRank.timeLimit = self.VizRank.evaluationTime … … 1232 1213 1233 1214 self.VizRank.evaluateProjections() 1234 1235 # do we want to optimize current projection. if yes then spend the same amount of time to optimize it 1215 1216 # do we want to optimize current projection. if yes then spend the same amount of time to optimize it 1236 1217 if self.VizRank.optimizeTimeLimit > 0 or self.VizRank.optimizeProjectionLimit: 1237 1218 self.VizRank.optimizeBestProjections() … … 1241 1222 1242 1223 1243 # for a given example run argumentation and find out to which class it most often fall 1224 # for a given example run argumentation and find out to which class it most often fall 1244 1225 def __call__(self, example, returnType = orange.GetBoth): 1245 1226 if self.VizRank.__class__.__name__ == "OWVizRank": … … 1250 1231 else: 1251 1232 classVal, dist = self.VizRank.findArguments(example) 1252 1233 1253 1234 if returnType == orange.GetBoth: return classVal, dist 1254 1235 else: return classVal 1255 1236 1256 1237 1257 1238 # ############################################################################# … … 1263 1244 self.VizRank = vizrank 1264 1245 self.name = self.VizRank.learnerName 1265 1266 1246 1247 1267 1248 def __call__(self, examples, weightID = 0): 1268 1249 return VizRankClassifier(self.VizRank, examples) … … 1293 1274 #vizrank.attrSubsetSelection = GAMMA_SINGLE 1294 1275 vizrank.attrSubsetSelection = DETERMINISTIC_ALL 1295 1276 1296 1277 #vizrank.attrCont = CONT_MEAS_S2N 1297 1278 vizrank.attrCont = CONT_MEAS_S2NMIX 1298 1279 1299 1280 #vizrank.storeEachPermutation = 1 1300 1281 #vizrank.load(r"E:\Development\Python23\Lib\sitepackages\Orange\Datasets\microarray\cancer\leukemia  Radviz  test.proj") … … 1303 1284 vizrank.evaluateProjections() 1304 1285 #vizrank.findArguments(data[0]) 1305 1286
Note: See TracChangeset
for help on using the changeset viewer.