source: orange/orange/Orange/network/community.py @ 8042:ffcb93bc9028

Revision 8042:ffcb93bc9028, 6.1 KB checked in by markotoplak, 3 years ago (diff)

Hierarchical clustering: also catch RuntimeError when importing matplotlib (or the documentation could not be built on server).

Line 
1"""
2.. index:: community detection in graphs
3
4.. index::
5   single: network; community detection in graphs
6
7*****************************
8Community detection in graphs
9*****************************
10
11"""
12
13import random
14import itertools
15
16import Orange
17
18def add_results_to_items(G, lblhistory):
19    attrs = [Orange.data.variable.Discrete('clustering label propagation',
20                                values=list(set([l for l in lblhistory[-1]])))]
21   
22    dom = Orange.data.Domain(attrs, 0)
23    data = Orange.data.Table(dom, [[l] for l in lblhistory[-1]])
24    if G.items() is None:
25        G.set_items(data) 
26    else: 
27        G.set_items(Orange.data.Table([G.items(), data]))
28       
29def add_history_to_items(G, lblhistory):
30    attrs = [Orange.data.variable.Discrete('c'+ str(i), values=list(set( \
31            [l for l in lblhistory[0]]))) for i,lbls in enumerate(lblhistory)]
32   
33    dom = Orange.data.Domain(attrs, 0)
34    # transpose history
35    data = map(list, zip(*lblhistory))
36    data = Orange.data.Table(dom, data)
37    if G.items() is None:
38        G.set_items(data) 
39    else: 
40        G.set_items(Orange.data.Table([G.items(), data]))
41           
42def label_propagation_hop_attenuation(G, results2items=0, resultHistory2items=0, iterations=1000, delta=0.1, node_degree_preference=0):
43    """Label propagation for community detection, Leung et al., 2009
44   
45    :param G: A Orange graph.
46    :type G: Orange.network.Graph
47   
48    :param results2items: Append a new feature result to items
49        (Orange.data.Table).
50    :type results2items: bool
51   
52    :param resultHistory2items: Append new features result to items
53        (Orange.data.Table) after each iteration of the algorithm.
54    :type resultHistory2items: bool
55   
56    :param iterations: The maximum number of iterations if there is no convergence.
57    :type iterations: int
58   
59    :param delta: The hop attenuation factor.
60    :type delta: float
61   
62    :param node_degree_preference: The power on node degree factor.
63    :type node_degree_preference: float
64   
65    """
66   
67    if G.is_directed():
68        raise Orange.network.nx.NetworkXError("""Not allowed for directed graph
69              G Use UG=G.to_undirected() to create an undirected graph.""")
70   
71    vertices = G.nodes()
72    degrees = dict(zip(vertices, [G.degree(v) for v in vertices]))
73    labels = dict(zip(vertices, range(G.number_of_nodes())))
74    scores = dict(zip(vertices, [1] * G.number_of_nodes()))
75    lblhistory = []
76    m = node_degree_preference
77   
78    for i in range(iterations):
79        random.shuffle(vertices)
80        stop = 1
81        for v in vertices:
82            neighbors = G.neighbors(v)
83            if len(neighbors) == 0:
84                continue
85           
86            lbls = sorted(((G.edge[v][u].get('weight', 1), labels[u], u) for u in neighbors), key=lambda x: x[1])
87            lbls = [(sum(scores[u] * degrees[u]**m * weight for weight, u_label, u in group), label) for label, group in itertools.groupby(lbls, lambda x: x[1])]
88            max_score = max(lbls)[0]
89            max_lbls = [label for score, label in lbls if score >= max_score]
90           
91            # only change label if it is not already one of the
92            # preferred (maximal) labels
93            if labels[v] not in max_lbls:
94                labels[v] = random.choice(max_lbls)
95                scores[v] = max(0, max(scores[u] for u in neighbors if labels[u] == labels[v]) - delta)
96                stop = 0
97           
98        lblhistory.append([str(labels[key]) for key in sorted(labels.keys())])
99        # if stopping condition is satisfied (none of the labels switched color)
100        if stop:
101            break
102           
103    if results2items and not resultHistory2items:
104        add_results_to_items(G, lblhistory)
105       
106    if resultHistory2items:
107        add_history_to_items(G, lblhistory)
108   
109    print "iterations:", i
110    return labels
111
112def label_propagation(G, results2items=0, resultHistory2items=0, iterations=1000):
113    """Label propagation for community detection, Raghavan et al., 2007
114   
115    :param G: A Orange graph.
116    :type G: Orange.network.Graph
117   
118    :param results2items: Append a new feature result to items
119        (Orange.data.Table).
120    :type results2items: bool
121   
122    :param resultHistory2items: Append new features result to items
123        (Orange.data.Table) after each iteration of the algorithm.
124    :type resultHistory2items: bool
125   
126    :param iterations: The maximum number of iterations if there is no convergence.
127    :type iterations: int
128   
129    """
130   
131    def next_label(neighbors):
132        """Updating rule as described by Raghavan et al., 2007
133       
134        Return a list of possible node labels with equal probability.
135        """
136        lbls = sorted(labels[u] for u in neighbors)
137        lbls = [(len(list(c)), l) for l, c in itertools.groupby(lbls)]
138        m = max(lbls)[0]
139        return [l for c, l in lbls if c >= m]
140   
141    vertices = G.nodes()
142    labels = dict(zip(vertices, range(G.number_of_nodes()))) 
143    lblhistory = []
144    for i in range(iterations):
145        random.shuffle(vertices)
146        stop = 1
147        for v in vertices:
148            nbh = G.neighbors(v)
149            if len(nbh) == 0:
150                continue
151           
152            max_lbls = next_label(nbh)
153           
154            if labels[v] not in max_lbls:
155                stop = 0
156           
157            labels[v] = random.choice(max_lbls)
158           
159        lblhistory.append([str(labels[key]) for key in sorted(labels.keys())])
160        # if stopping condition might be satisfied, check it
161        # stop when no label would switch anymore
162        if stop:
163            for v in vertices:
164                nbh = G.neighbors(v)
165                if len(nbh) == 0: 
166                    continue
167                max_lbls = next_label(nbh)
168                if labels[v] not in max_lbls: 
169                    stop = 0
170                    break
171           
172            if stop: break
173               
174    if results2items and not resultHistory2items:
175        add_results_to_items(G, lblhistory)
176       
177    if resultHistory2items:
178        add_history_to_items(G, lblhistory)
179       
180    print "iterations:", i
181    return labels
Note: See TracBrowser for help on using the repository browser.