Changeset 9629:316d2bdcf34a in orange for orange/Orange/network/community.py
 Timestamp:
 02/04/12 19:00:53 (2 years ago)
 Branch:
 default
 rebase_source:
 45f2bb328e9f62a4232511d95325038d8c9fb22a
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/Orange/network/community.py
r8042 r9629 1 """ 1 """ 2 2 .. index:: community detection in graphs 3 3 … … 14 14 import itertools 15 15 16 import Orange 16 import Orange.core 17 import Orange.data 18 import Orange.network 19 17 20 18 21 def add_results_to_items(G, lblhistory): 22 items = G.items() 23 if items is not None and 'clustering label propagation' in items.domain: 24 exclude = [attr for attr in items.domain if attr.name == \ 25 'clustering label propagation'] 26 items = Orange.core.Preprocessor_ignore(items, attributes=exclude) 27 19 28 attrs = [Orange.data.variable.Discrete('clustering label propagation', 20 21 29 values=list(set([l for l in lblhistory[1]])))] 30 22 31 dom = Orange.data.Domain(attrs, 0) 23 32 data = Orange.data.Table(dom, [[l] for l in lblhistory[1]]) 24 if G.items() is None: 25 G.set_items(data) 26 else: 27 G.set_items(Orange.data.Table([G.items(), data])) 28 33 34 if items is None: 35 G.set_items(data) 36 else: 37 G.set_items(Orange.data.Table([items, data])) 38 39 29 40 def add_history_to_items(G, lblhistory): 30 attrs = [Orange.data.variable.Discrete('c'+ str(i), values=list(set( \ 31 [l for l in lblhistory[0]]))) for i,lbls in enumerate(lblhistory)] 32 41 items = G.items() 42 if items is not None: 43 exclude = [attr for attr in items.domain if attr.name in \ 44 ['c' + str(i) for i in range(1000)]] 45 items = Orange.core.Preprocessor_ignore(items, attributes=exclude) 46 47 attrs = [Orange.data.variable.Discrete('c' + str(i), values=list(set(\ 48 [l for l in lblhistory[0]]))) for i, _ in enumerate(lblhistory)] 49 33 50 dom = Orange.data.Domain(attrs, 0) 34 51 # transpose history 35 52 data = map(list, zip(*lblhistory)) 36 53 data = Orange.data.Table(dom, data) 37 if G.items() is None: 38 G.set_items(data) 39 else: 40 G.set_items(Orange.data.Table([G.items(), data])) 41 42 def label_propagation_hop_attenuation(G, results2items=0, resultHistory2items=0, iterations=1000, delta=0.1, node_degree_preference=0): 54 if items is None: 55 G.set_items(data) 56 else: 57 G.set_items(Orange.data.Table([items, data])) 58 59 60 def label_propagation_hop_attenuation(G, results2items=0, \ 61 resultHistory2items=0, iterations=1000, \ 62 delta=0.1, node_degree_preference=0): 43 63 """Label propagation for community detection, Leung et al., 2009 44 64 45 65 :param G: A Orange graph. 46 66 :type G: Orange.network.Graph 47 48 :param results2items: Append a new feature result to items 67 68 :param results2items: Append a new feature result to items 49 69 (Orange.data.Table). 50 70 :type results2items: bool 51 52 :param resultHistory2items: Append new features result to items 71 72 :param resultHistory2items: Append new features result to items 53 73 (Orange.data.Table) after each iteration of the algorithm. 54 74 :type resultHistory2items: bool 55 56 :param iterations: The max imum number of iterations if there is no convergence.75 76 :param iterations: The max. number of iterations if no convergence. 57 77 :type iterations: int 58 59 :param delta: The hop attenuation factor. 78 79 :param delta: The hop attenuation factor. 60 80 :type delta: float 61 62 :param node_degree_preference: The power on node degree factor. 81 82 :param node_degree_preference: The power on node degree factor. 63 83 :type node_degree_preference: float 64 84 65 85 """ 66 86 67 87 if G.is_directed(): 68 88 raise Orange.network.nx.NetworkXError("""Not allowed for directed graph 69 89 G Use UG=G.to_undirected() to create an undirected graph.""") 70 90 71 91 vertices = G.nodes() 72 92 degrees = dict(zip(vertices, [G.degree(v) for v in vertices])) … … 75 95 lblhistory = [] 76 96 m = node_degree_preference 77 97 78 98 for i in range(iterations): 79 99 random.shuffle(vertices) … … 83 103 if len(neighbors) == 0: 84 104 continue 85 86 lbls = sorted(((G.edge[v][u].get('weight', 1), labels[u], u) for u in neighbors), key=lambda x: x[1]) 87 lbls = [(sum(scores[u] * degrees[u]**m * weight for weight, u_label, u in group), label) for label, group in itertools.groupby(lbls, lambda x: x[1])] 105 106 lbls = sorted(((G.edge[v][u].get('weight', 1), labels[u], u) \ 107 for u in neighbors), key=lambda x: x[1]) 108 lbls = [(sum(scores[u] * degrees[u] ** m * weight for weight, \ 109 _u_label, u in group), label) for label, group in \ 110 itertools.groupby(lbls, lambda x: x[1])] 88 111 max_score = max(lbls)[0] 89 112 max_lbls = [label for score, label in lbls if score >= max_score] 90 91 # only change label if it is not already one of the 113 114 # only change label if it is not already one of the 92 115 # preferred (maximal) labels 93 116 if labels[v] not in max_lbls: 94 117 labels[v] = random.choice(max_lbls) 95 scores[v] = max(0, max(scores[u] for u in neighbors if labels[u] == labels[v])  delta) 118 scores[v] = max(0, max(scores[u] for u in neighbors \ 119 if labels[u] == labels[v])  delta) 96 120 stop = 0 97 121 98 122 lblhistory.append([str(labels[key]) for key in sorted(labels.keys())]) 99 # if stopping condition is satisfied (no ne of the labelsswitched color)123 # if stopping condition is satisfied (no label switched color) 100 124 if stop: 101 125 break 102 126 103 127 if results2items and not resultHistory2items: 104 128 add_results_to_items(G, lblhistory) 105 129 106 130 if resultHistory2items: 107 131 add_history_to_items(G, lblhistory) 108 109 print "iterations:", i132 133 #print "iterations:", i 110 134 return labels 111 135 112 def label_propagation(G, results2items=0, resultHistory2items=0, iterations=1000): 136 137 def label_propagation(G, results2items=0, \ 138 resultHistory2items=0, iterations=1000): 113 139 """Label propagation for community detection, Raghavan et al., 2007 114 140 115 141 :param G: A Orange graph. 116 142 :type G: Orange.network.Graph 117 118 :param results2items: Append a new feature result to items 143 144 :param results2items: Append a new feature result to items 119 145 (Orange.data.Table). 120 146 :type results2items: bool 121 122 :param resultHistory2items: Append new features result to items 147 148 :param resultHistory2items: Append new features result to items 123 149 (Orange.data.Table) after each iteration of the algorithm. 124 150 :type resultHistory2items: bool 125 151 126 152 :param iterations: The maximum number of iterations if there is no convergence. 127 153 :type iterations: int 128 154 129 155 """ 130 156 131 157 def next_label(neighbors): 132 158 """Updating rule as described by Raghavan et al., 2007 133 159 134 160 Return a list of possible node labels with equal probability. 135 161 """ … … 138 164 m = max(lbls)[0] 139 165 return [l for c, l in lbls if c >= m] 140 166 141 167 vertices = G.nodes() 142 labels = dict(zip(vertices, range(G.number_of_nodes()))) 168 labels = dict(zip(vertices, range(G.number_of_nodes()))) 143 169 lblhistory = [] 144 170 for i in range(iterations): … … 149 175 if len(nbh) == 0: 150 176 continue 151 177 152 178 max_lbls = next_label(nbh) 153 179 154 180 if labels[v] not in max_lbls: 155 181 stop = 0 156 182 157 183 labels[v] = random.choice(max_lbls) 158 184 159 185 lblhistory.append([str(labels[key]) for key in sorted(labels.keys())]) 160 186 # if stopping condition might be satisfied, check it … … 163 189 for v in vertices: 164 190 nbh = G.neighbors(v) 165 if len(nbh) == 0: 191 if len(nbh) == 0: 166 192 continue 167 193 max_lbls = next_label(nbh) 168 if labels[v] not in max_lbls: 194 if labels[v] not in max_lbls: 169 195 stop = 0 170 196 break 171 172 if stop: break 173 197 198 if stop: 199 break 200 174 201 if results2items and not resultHistory2items: 175 202 add_results_to_items(G, lblhistory) 176 203 177 204 if resultHistory2items: 178 205 add_history_to_items(G, lblhistory) 179 180 print "iterations:", i206 207 #print "iterations:", i 181 208 return labels
Note: See TracChangeset
for help on using the changeset viewer.