source: orange/Orange/network/community.py @ 10904:0e7e24edfbb2

Revision 10904:0e7e24edfbb2, 6.9 KB checked in by mstajdohar, 23 months ago (diff)

Added random seed feature.

Line 
1"""
2.. index:: community detection in graphs
3
4.. index::
5   single: network; community detection in graphs
6
7*****************************
8Community detection in graphs
9*****************************
10
11"""
12
13import random
14import itertools
15
16import Orange.core
17import Orange.data
18import Orange.network
19
20
21def add_results_to_items(G, lblhistory):
22    items = G.items()
23    if items is not None and 'clustering label propagation' in items.domain:
24        exclude = [attr for attr in items.domain if attr.name == \
25                   'clustering label propagation']
26        items = Orange.core.Preprocessor_ignore(items, attributes=exclude)
27
28    attrs = [Orange.feature.Discrete('clustering label propagation',
29                            values=list(set([l for l in lblhistory[-1]])))]
30
31    dom = Orange.data.Domain(attrs, 0)
32    data = Orange.data.Table(dom, [[l] for l in lblhistory[-1]])
33
34    if items is None:
35        G.set_items(data)
36    else:
37        G.set_items(Orange.data.Table([items, data]))
38
39
40def add_history_to_items(G, lblhistory):
41    items = G.items()
42    if items is not None:
43        exclude = [attr for attr in items.domain if attr.name in \
44                   ['c' + str(i) for i in range(1000)]]
45        items = Orange.core.Preprocessor_ignore(items, attributes=exclude)
46
47    attrs = [Orange.feature.Discrete('c' + str(i), values=list(set(\
48            [l for l in lblhistory[0]]))) for i, _ in enumerate(lblhistory)]
49
50    dom = Orange.data.Domain(attrs, 0)
51    # transpose history
52    data = map(list, zip(*lblhistory))
53    data = Orange.data.Table(dom, data)
54    if items is None:
55        G.set_items(data)
56    else:
57        G.set_items(Orange.data.Table([items, data]))
58
59
60class CommunityDetection(object):
61
62    def __init__(self, algorithm, **kwargs):
63        self.algorithm = algorithm
64        self.kwargs = kwargs
65
66    def __call__(self, G):
67        return self.algorithm(G, **self.kwargs)
68
69def label_propagation_hop_attenuation(G, results2items=0, \
70                                      resultHistory2items=0, iterations=1000, \
71                                      delta=0.1, node_degree_preference=0):
72    """Label propagation for community detection, Leung et al., 2009
73
74    :param G: A Orange graph.
75    :type G: Orange.network.Graph
76
77    :param results2items: Append a new feature result to items
78        (Orange.data.Table).
79    :type results2items: bool
80
81    :param resultHistory2items: Append new features result to items
82        (Orange.data.Table) after each iteration of the algorithm.
83    :type resultHistory2items: bool
84
85    :param iterations: The max. number of iterations if no convergence.
86    :type iterations: int
87
88    :param delta: The hop attenuation factor.
89    :type delta: float
90
91    :param node_degree_preference: The power on node degree factor.
92    :type node_degree_preference: float
93
94    """
95
96    if G.is_directed():
97        raise Orange.network.nx.NetworkXError("""Not allowed for directed graph
98              G Use UG=G.to_undirected() to create an undirected graph.""")
99
100    vertices = G.nodes()
101    degrees = dict(zip(vertices, [G.degree(v) for v in vertices]))
102    labels = dict(zip(vertices, range(G.number_of_nodes())))
103    scores = dict(zip(vertices, [1] * G.number_of_nodes()))
104    lblhistory = []
105    m = node_degree_preference
106
107    for i in range(iterations):
108        random.shuffle(vertices)
109        stop = 1
110        for v in vertices:
111            neighbors = G.neighbors(v)
112            if len(neighbors) == 0:
113                continue
114
115            lbls = sorted(((G.edge[v][u].get('weight', 1), labels[u], u) \
116                           for u in neighbors), key=lambda x: x[1])
117            lbls = [(sum(scores[u] * degrees[u] ** m * weight for weight, \
118                         _u_label, u in group), label) for label, group in \
119                    itertools.groupby(lbls, lambda x: x[1])]
120            max_score = max(lbls)[0]
121            max_lbls = [label for score, label in lbls if score >= max_score]
122
123            # only change label if it is not already one of the
124            # preferred (maximal) labels
125            if labels[v] not in max_lbls:
126                labels[v] = random.choice(max_lbls)
127                scores[v] = max(0, max(scores[u] for u in neighbors \
128                                       if labels[u] == labels[v]) - delta)
129                stop = 0
130
131        lblhistory.append([str(labels[key]) for key in sorted(labels.keys())])
132        # if stopping condition is satisfied (no label switched color)
133        if stop:
134            break
135
136    if results2items and not resultHistory2items:
137        add_results_to_items(G, lblhistory)
138
139    if resultHistory2items:
140        add_history_to_items(G, lblhistory)
141
142    #print "iterations:", i
143    return labels
144
145
146def label_propagation(G, results2items=0, \
147                      resultHistory2items=0, iterations=1000, seed=None):
148    """Label propagation for community detection, Raghavan et al., 2007
149
150    :param G: A Orange graph.
151    :type G: Orange.network.Graph
152
153    :param results2items: Append a new feature result to items
154        (Orange.data.Table).
155    :type results2items: bool
156
157    :param resultHistory2items: Append new features result to items
158        (Orange.data.Table) after each iteration of the algorithm.
159    :type resultHistory2items: bool
160
161    :param iterations: The maximum number of iterations if there is no convergence.
162    :type iterations: int
163
164    """
165
166    if seed is not None:
167        random.seed(seed)
168
169    vertices = sorted(G.nodes_iter())
170    labels = dict(zip(vertices, range(G.number_of_nodes())))
171
172    def next_label(neighbors):
173        """Updating rule as described by Raghavan et al., 2007
174
175        Return a list of possible node labels with equal probability.
176        """
177        lbls = sorted(labels[u] for u in neighbors)
178        lbls = [(len(list(c)), l) for l, c in itertools.groupby(lbls)]
179        m = max(lbls)[0]
180        return [l for c, l in lbls if c >= m]
181
182    lblhistory = []
183    for i in range(iterations):
184        random.shuffle(vertices)
185        stop = 1
186        for v in vertices:
187            nbh = G.neighbors(v)
188            if len(nbh) == 0:
189                continue
190
191            max_lbls = next_label(nbh)
192
193            if labels[v] not in max_lbls:
194                stop = 0
195
196            labels[v] = random.choice(max_lbls)
197
198        lblhistory.append([str(labels[key]) for key in sorted(labels.keys())])
199        # if stopping condition might be satisfied, check it
200        # stop when no label would switch anymore
201        if stop:
202            for v in vertices:
203                nbh = G.neighbors(v)
204                if len(nbh) == 0:
205                    continue
206                max_lbls = next_label(nbh)
207                if labels[v] not in max_lbls:
208                    stop = 0
209                    break
210
211            if stop:
212                break
213
214    if results2items and not resultHistory2items:
215        add_results_to_items(G, lblhistory)
216
217    if resultHistory2items:
218        add_history_to_items(G, lblhistory)
219
220    #print "iterations:", i
221    return labels
Note: See TracBrowser for help on using the repository browser.