source: orange/orange/Orange/network/community.py @ 9629:316d2bdcf34a

Revision 9629:316d2bdcf34a, 6.6 KB checked in by Miha Stajdohar <miha.stajdohar@…>, 2 years ago (diff)

Fixed a bug when appending clustering results multiple times.

Line 
1"""
2.. index:: community detection in graphs
3
4.. index::
5   single: network; community detection in graphs
6
7*****************************
8Community detection in graphs
9*****************************
10
11"""
12
13import random
14import itertools
15
16import Orange.core
17import Orange.data
18import Orange.network
19
20
21def add_results_to_items(G, lblhistory):
22    items = G.items()
23    if items is not None and 'clustering label propagation' in items.domain:
24        exclude = [attr for attr in items.domain if attr.name == \
25                   'clustering label propagation']
26        items = Orange.core.Preprocessor_ignore(items, attributes=exclude)
27
28    attrs = [Orange.data.variable.Discrete('clustering label propagation',
29                            values=list(set([l for l in lblhistory[-1]])))]
30
31    dom = Orange.data.Domain(attrs, 0)
32    data = Orange.data.Table(dom, [[l] for l in lblhistory[-1]])
33
34    if items is None:
35        G.set_items(data)
36    else:
37        G.set_items(Orange.data.Table([items, data]))
38
39
40def add_history_to_items(G, lblhistory):
41    items = G.items()
42    if items is not None:
43        exclude = [attr for attr in items.domain if attr.name in \
44                   ['c' + str(i) for i in range(1000)]]
45        items = Orange.core.Preprocessor_ignore(items, attributes=exclude)
46
47    attrs = [Orange.data.variable.Discrete('c' + str(i), values=list(set(\
48            [l for l in lblhistory[0]]))) for i, _ in enumerate(lblhistory)]
49
50    dom = Orange.data.Domain(attrs, 0)
51    # transpose history
52    data = map(list, zip(*lblhistory))
53    data = Orange.data.Table(dom, data)
54    if items is None:
55        G.set_items(data)
56    else:
57        G.set_items(Orange.data.Table([items, data]))
58
59
60def label_propagation_hop_attenuation(G, results2items=0, \
61                                      resultHistory2items=0, iterations=1000, \
62                                      delta=0.1, node_degree_preference=0):
63    """Label propagation for community detection, Leung et al., 2009
64
65    :param G: A Orange graph.
66    :type G: Orange.network.Graph
67
68    :param results2items: Append a new feature result to items
69        (Orange.data.Table).
70    :type results2items: bool
71
72    :param resultHistory2items: Append new features result to items
73        (Orange.data.Table) after each iteration of the algorithm.
74    :type resultHistory2items: bool
75
76    :param iterations: The max. number of iterations if no convergence.
77    :type iterations: int
78
79    :param delta: The hop attenuation factor.
80    :type delta: float
81
82    :param node_degree_preference: The power on node degree factor.
83    :type node_degree_preference: float
84
85    """
86
87    if G.is_directed():
88        raise Orange.network.nx.NetworkXError("""Not allowed for directed graph
89              G Use UG=G.to_undirected() to create an undirected graph.""")
90
91    vertices = G.nodes()
92    degrees = dict(zip(vertices, [G.degree(v) for v in vertices]))
93    labels = dict(zip(vertices, range(G.number_of_nodes())))
94    scores = dict(zip(vertices, [1] * G.number_of_nodes()))
95    lblhistory = []
96    m = node_degree_preference
97
98    for i in range(iterations):
99        random.shuffle(vertices)
100        stop = 1
101        for v in vertices:
102            neighbors = G.neighbors(v)
103            if len(neighbors) == 0:
104                continue
105
106            lbls = sorted(((G.edge[v][u].get('weight', 1), labels[u], u) \
107                           for u in neighbors), key=lambda x: x[1])
108            lbls = [(sum(scores[u] * degrees[u] ** m * weight for weight, \
109                         _u_label, u in group), label) for label, group in \
110                    itertools.groupby(lbls, lambda x: x[1])]
111            max_score = max(lbls)[0]
112            max_lbls = [label for score, label in lbls if score >= max_score]
113
114            # only change label if it is not already one of the
115            # preferred (maximal) labels
116            if labels[v] not in max_lbls:
117                labels[v] = random.choice(max_lbls)
118                scores[v] = max(0, max(scores[u] for u in neighbors \
119                                       if labels[u] == labels[v]) - delta)
120                stop = 0
121
122        lblhistory.append([str(labels[key]) for key in sorted(labels.keys())])
123        # if stopping condition is satisfied (no label switched color)
124        if stop:
125            break
126
127    if results2items and not resultHistory2items:
128        add_results_to_items(G, lblhistory)
129
130    if resultHistory2items:
131        add_history_to_items(G, lblhistory)
132
133    #print "iterations:", i
134    return labels
135
136
137def label_propagation(G, results2items=0, \
138                      resultHistory2items=0, iterations=1000):
139    """Label propagation for community detection, Raghavan et al., 2007
140
141    :param G: A Orange graph.
142    :type G: Orange.network.Graph
143
144    :param results2items: Append a new feature result to items
145        (Orange.data.Table).
146    :type results2items: bool
147
148    :param resultHistory2items: Append new features result to items
149        (Orange.data.Table) after each iteration of the algorithm.
150    :type resultHistory2items: bool
151
152    :param iterations: The maximum number of iterations if there is no convergence.
153    :type iterations: int
154
155    """
156
157    def next_label(neighbors):
158        """Updating rule as described by Raghavan et al., 2007
159
160        Return a list of possible node labels with equal probability.
161        """
162        lbls = sorted(labels[u] for u in neighbors)
163        lbls = [(len(list(c)), l) for l, c in itertools.groupby(lbls)]
164        m = max(lbls)[0]
165        return [l for c, l in lbls if c >= m]
166
167    vertices = G.nodes()
168    labels = dict(zip(vertices, range(G.number_of_nodes())))
169    lblhistory = []
170    for i in range(iterations):
171        random.shuffle(vertices)
172        stop = 1
173        for v in vertices:
174            nbh = G.neighbors(v)
175            if len(nbh) == 0:
176                continue
177
178            max_lbls = next_label(nbh)
179
180            if labels[v] not in max_lbls:
181                stop = 0
182
183            labels[v] = random.choice(max_lbls)
184
185        lblhistory.append([str(labels[key]) for key in sorted(labels.keys())])
186        # if stopping condition might be satisfied, check it
187        # stop when no label would switch anymore
188        if stop:
189            for v in vertices:
190                nbh = G.neighbors(v)
191                if len(nbh) == 0:
192                    continue
193                max_lbls = next_label(nbh)
194                if labels[v] not in max_lbls:
195                    stop = 0
196                    break
197
198            if stop:
199                break
200
201    if results2items and not resultHistory2items:
202        add_results_to_items(G, lblhistory)
203
204    if resultHistory2items:
205        add_history_to_items(G, lblhistory)
206
207    #print "iterations:", i
208    return labels
Note: See TracBrowser for help on using the repository browser.