source: orange/Orange/network/readwrite.py @ 10469:a76928c0d05b

Revision 10469:a76928c0d05b, 11.5 KB checked in by Ales Erjavec <ales.erjavec@…>, 2 years ago (diff)

Changed 'import orangeom' to 'from Orange import orangeom' (fixes #1122).

Line 
1"""
2.. index:: reading and writing networks
3
4.. index::
5   single: network; reading and writing networks
6
7****************************
8Reading and writing networks
9****************************
10
11When using networks in Orange data mining suite, I advise you not to use
12NetworkX reading and writing methods.  Instead, use new methods provided in
13the :obj:`Orange.network.readwrite` module. If, for some reason, you have to
14use the original read / write methods, do not forget to cast the network (see
15Orange.network.readwrite._wrap method). 
16
17"""
18
19import os
20import os.path
21import warnings
22import itertools
23import tempfile
24import gzip
25
26import networkx as nx
27import networkx.readwrite.pajek as rwpajek
28import networkx.readwrite.gml as rwgml
29import networkx.readwrite.gpickle as rwgpickle
30
31import Orange
32import Orange.network
33from Orange import orangeom
34
35__all__ = ['read', 'write', 'read_gpickle', 'write_gpickle', 'read_pajek', 
36           'write_pajek', 'parse_pajek', 'generate_pajek', 'read_gml', 
37           'write_gml']
38
39def _wrap(g):
40    for base, new in [(nx.DiGraph, Orange.network.DiGraph),
41                      (nx.MultiGraph, Orange.network.MultiGraph),
42                      (nx.MultiDiGraph, Orange.network.MultiDiGraph),
43                      (nx.Graph, Orange.network.Graph)]:
44        if isinstance(g, base):
45            return g if isinstance(g, new) else new(g, name=g.name)
46    return g
47
48def _add_doc(myclass, nxclass):
49    tmp = nxclass.__doc__.replace('nx.write', 'Orange.network.readwrite.write')
50    tmp = tmp.replace('nx.read', 'Orange.network.readwrite.read')
51    tmp = tmp.replace('nx', 'Orange.network.nx')
52    myclass.__doc__ += tmp
53
54def _is_string_like(obj): # from John Hunter, types-free version
55    """Check if obj is string."""
56    try:
57        obj + ''
58    except (TypeError, ValueError):
59        return False
60    return True
61
62def _get_fh(path, mode='r'):
63    """ Return a file handle for given path.
64
65    Path can be a string or a file handle.
66
67    Attempt to uncompress/compress files ending in '.gz' and '.bz2'.
68
69    """
70    if _is_string_like(path):
71        if path.endswith('.gz'):
72            import gzip
73            fh = gzip.open(path,mode=mode)
74        elif path.endswith('.bz2'):
75            import bz2
76            fh = bz2.BZ2File(path,mode=mode)
77        else:
78            fh = open(path,mode = mode)           
79    elif hasattr(path, 'read'):
80        fh = path
81    else:
82        raise ValueError('path must be a string or file handle')
83    return fh
84
85def _make_str(t):
86    """Return the string representation of t."""
87    if _is_string_like(t): return t
88    return str(t)
89
90def graph_to_table(G):
91    """Builds a Data Table from node values."""
92    if G.number_of_nodes() > 0:
93        features = list(set(itertools.chain.from_iterable(node.iterkeys() for node in G.node.itervalues())))
94        data = [[node.get(f).encode('utf-8').replace('\t', ' ') if type(node.get(f, 1)) == str or type(node.get(f, 1)) == unicode else str(node.get(f, '?')) for f in features] for node in G.node.itervalues()]
95        fp = tempfile.NamedTemporaryFile('wt', suffix='.txt', delete=False)
96        fp.write('\n'.join('\t'.join(line) for line in [features] + data))
97        fp.close()
98        table = Orange.data.Table(fp.name)
99        os.unlink(fp.name)
100       
101    return table
102
103def read(path, encoding='UTF-8', auto_table=0):
104    """Read graph in any of the supported file formats (.gpickle, .net, .gml).
105    The parser is chosen based on the file extension.
106   
107    :param path: File or filename to write.
108    :type path: string
109
110    Return the network of type :obj:`Orange.network.Graph`,
111    :obj:`Orange.network.DiGraph`, :obj:`Orange.network.Graph` or
112    :obj:`Orange.network.DiGraph`.
113   
114    """
115   
116    #supported = ['.net', '.gml', '.gpickle', '.gz', '.bz2', '.graphml']
117    supported = ['.net', '.gml', '.gpickle', '.gz']
118   
119    if not os.path.isfile(path):
120        raise OSError('File %s does not exist.' % path)
121   
122    root, ext = os.path.splitext(path)
123    if not ext in supported:
124        raise ValueError('Extension %s is not supported.' % ext)
125   
126    if ext == '.net':
127        return read_pajek(path, encoding, auto_table=auto_table)
128   
129    if ext == '.gml':
130        return read_gml(path, encoding, auto_table=auto_table)
131   
132    if ext == '.gpickle':
133        return read_gpickle(path, auto_table=auto_table)
134   
135    if ext == '.gz' and path[-6:] == 'txt.gz':
136        return read_txtgz(path)
137
138def write(G, path, encoding='UTF-8'):
139    """Write graph in any of the supported file formats (.gpickle, .net, .gml).
140    The file format is chosen based on the file extension.
141   
142    :param G: A Orange graph.
143    :type G: Orange.network.Graph
144         
145    :param path: File or filename to write.
146    :type path: string
147     
148    """
149   
150    #supported = ['.net', '.gml', '.gpickle', '.gz', '.bz2', '.graphml']
151    supported = ['.net', '.gml', '.gpickle']
152   
153    root, ext = os.path.splitext(path)
154    if not ext in supported:
155        raise ValueError('Extension %s is not supported. Use %s.' % (ext, ', '.join(supported)))
156   
157    if ext == '.net':
158        write_pajek(G, path, encoding)
159       
160    if ext == '.gml':
161        write_gml(G, path)
162       
163    if ext == '.gpickle':
164        write_gpickle(G, path)
165       
166    if G.items() is not None:
167        G.items().save(root + '_items.tab')
168       
169    if G.links() is not None:
170        G.links().save(root + '_links.tab')
171
172def read_gpickle(path, auto_table=False):
173    """NetworkX read_gpickle method and wrap graph to Orange network.
174   
175    """
176   
177    G = _wrap(rwgpickle.read_gpickle(path))
178    if auto_table:
179        G.set_items(graph_to_table(G))
180    return G
181
182_add_doc(read_gpickle, rwgpickle.read_gpickle)
183
184def write_gpickle(G, path):
185    """NetworkX write_gpickle method.
186   
187    """
188   
189    rwgpickle.write_gpickle(G, path)
190
191_add_doc(write_gpickle, rwgpickle.write_gpickle)
192
193def read_pajek(path, encoding='UTF-8', project=False, auto_table=False):
194    """A completely reimplemented method for reading Pajek files. Written in
195    C++ for maximum performance. 
196   
197    :param path: File or filename to write.
198    :type path: string
199   
200    :param encoding: Encoding of input text file, default 'UTF-8'.
201    :type encoding: string
202   
203    :param project: Determines whether the input file is a Pajek project file,
204        possibly containing multiple networks and other data. If :obj:`True`,
205        a list of networks is returned instead of just a network. Default is
206        :obj:`False`.
207    :type project: boolean.
208       
209    Return the network (or a list of networks if project=:obj:`True`) of type
210    :obj:`Orange.network.Graph` or :obj:`Orange.network.DiGraph`.
211
212
213    Examples
214
215    >>> G=Orange.network.nx.path_graph(4)
216    >>> Orange.network.readwrite.write_pajek(G, "test.net")
217    >>> G=Orange.network.readwrite.read_pajek("test.net")
218
219    To create a Graph instead of a MultiGraph use
220
221    >>> G1=Orange.network.Graph(G)
222
223    References
224
225    See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
226    for format information.
227   
228    """
229   
230    input = orangeom.GraphLayout().readPajek(path, project)
231    result = []
232    for g in input if project else [input]:
233        graphname, vertices, edges, arcs, items = g
234        if len(arcs) > 0:
235            # directed graph
236            G = Orange.network.DiGraph()
237            G.add_nodes_from(range(len(items)))
238            G.add_edges_from(((u,v,dict(d.items()+[('weight',w)])) for u,v,w,d in edges))
239            G.add_edges_from(((v,u,dict(d.items()+[('weight',w)])) for u,v,w,d in edges))
240            G.add_edges_from(((u,v,dict(d.items()+[('weight',w)])) for u,v,w,d in arcs))
241            if auto_table:
242                G.set_items(items)
243        else:
244            G = Orange.network.Graph()
245            G.add_nodes_from(range(len(items)))
246            G.add_edges_from(((u,v,dict(d.items()+[('weight',w)])) for u,v,w,d in edges))
247            if auto_table:
248                G.set_items(items)
249        for i, vdata in zip(range(len(G.node)), vertices):
250            G.node[i].update(vdata)
251        G.name = graphname
252       
253        result.append(G)
254       
255    if not project:
256        result = result[0]
257       
258    return result
259    #fh=_get_fh(path, 'rb')
260    #lines = (line.decode(encoding) for line in fh)
261    #return parse_pajek(lines)
262
263def write_pajek(G, path, encoding='UTF-8'):
264    """A copy & paste of NetworkX's function with some bugs fixed (call the new
265    generate_pajek).
266   
267    """
268   
269    fh=_get_fh(path, 'wb')
270    for line in generate_pajek(G):
271        line+='\n'
272        fh.write(line.encode(encoding))
273
274_add_doc(write_pajek, rwpajek.write_pajek)
275
276def parse_pajek(lines):
277    """Parse string in Pajek file format. See read_pajek for usage examples.
278   
279    :param lines: a string of network data in Pajek file format.
280    :type lines: string
281   
282    """
283   
284    return read_pajek(lines)
285
286
287def generate_pajek(G):
288    """A copy & paste of NetworkX's function with some bugs fixed (generate
289    one line per object: vertex, edge, arc. Do not add one per entry in data
290    dictionary).
291   
292    Generate lines in Pajek graph format.
293   
294    :param G: A Orange graph.
295    :type G: Orange.network.Graph
296
297    References
298   
299    See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
300    for format information.
301   
302    """
303   
304    if G.name=='': 
305        name='NetworkX'
306    else:
307        name=G.name
308    yield '*network %s'%name
309
310    # write nodes with attributes
311    yield '*vertices %s'%(G.order())
312    nodes = G.nodes()
313    # make dictionary mapping nodes to integers
314    nodenumber=dict(zip(nodes,range(1,len(nodes)+1))) 
315    for n in nodes:
316        na=G.node.get(n,{})
317        x=na.get('x',0.0)
318        y=na.get('y',0.0)
319        id=int(na.get('id',nodenumber[n]))
320        nodenumber[n]=id
321        shape=na.get('shape','ellipse')
322        s = ' '.join(map(_make_str,(id,n,x,y,shape)))
323        for k,v in na.items():
324            if k != 'x' and k != 'y':
325                s += ' %s %s'%(k,v)
326        yield s
327
328    # write edges with attributes         
329    if G.is_directed():
330        yield '*arcs'
331    else:
332        yield '*edges'
333    for u,v,edgedata in G.edges(data=True):
334        d=edgedata.copy()
335        value=d.pop('weight',1.0) # use 1 as default edge value
336        s = ' '.join(map(_make_str,(nodenumber[u],nodenumber[v],value)))
337        for k,v in d.items():
338            if not _is_string_like(v):
339                v = repr(v)
340            # add quotes to any values with a blank space
341            if " " in v: 
342                v="\"%s\"" % v.replace('"', r'\"')
343            s += ' %s %s'%(k,v)
344        yield s
345       
346
347#_add_doc(generate_pajek, rwpajek.generate_pajek)
348       
349def read_gml(path, encoding='latin-1', relabel=False, auto_table=False):
350    """NetworkX read_gml method and wrap graph to Orange network.
351   
352    """
353    G = _wrap(rwgml.read_gml(path, encoding, relabel))
354    if auto_table:
355        G.set_items(graph_to_table(G))
356    return G
357
358_add_doc(read_gml, rwgml.read_gml)
359
360def write_gml(G, path):
361    """NetworkX write_gml method.
362   
363    """
364   
365    rwgml.write_gml(G, path)
366
367def read_txtgz(path):
368    f = gzip.open(path, 'rb')
369    content = f.read()
370    f.close()
371   
372    content = content.split('\n')
373    comments = (line for line in content if line.strip().startswith('#'))
374    content = (line for line in content if not line.strip().startswith('#'))
375   
376    if "directed graph" in ''.join(comments).lower():
377        G = Orange.network.DiGraph()
378    else:
379        G = Orange.network.Graph()
380   
381    G.add_edges_from([int(node) for node in coors.strip().split('\t')] for coors in content if len(coors.strip().split('\t')) == 2)
382   
383    return G
384
385_add_doc(write_gml, rwgml.write_gml)
Note: See TracBrowser for help on using the repository browser.