source: orange/Orange/multilabel/mulan.py @ 9671:a7b056375472

Revision 9671:a7b056375472, 1.6 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Moved orange to Orange (part 2)

Line 
1import Orange
2import xml.dom.minidom
3from xml.dom.minidom import Node
4
5def trans_mulan_data(xml_name,arff_name, create_on_new = Orange.data.variable.Variable.MakeStatus.Incompatible, **kwargs):
6    """ Transform the mulan data format to Tab file.
7   
8        :param xml: a text file in XML format, specifying the labels and any hierarchical relationship among them.
9        see 'Mulan data format <http://mulan.sourceforge.net/format.html>'_
10        :type xml: string
11       
12        :param arff: a text file in the 'ARFF format of Weka <http://weka.wikispaces.com/ARFF>'_.
13        :type arff: string
14       
15        :rtype: :class:`Orange.data.Table`
16    """
17   
18    #load XML file
19    doc = xml.dom.minidom.parse(xml_name)
20   
21    labels = [str(node.getAttribute("name"))
22              for node in doc.getElementsByTagName("label")]
23       
24    #load ARFF file
25    arff_table = Orange.data.io.loadARFF_Weka(arff_name,create_on_new)
26    domain = arff_table.domain
27   
28    #remove class tag
29    features = [v for v in domain.variables if v.name not in labels]
30    class_vars = [v for v in domain.variables if v.name in labels]
31    domain = Orange.data.Domain(features, None, class_vars = class_vars)
32   
33    table = arff_table.translate(domain)
34   
35    return table
36
37##############################################################################
38# Test the code, run from DOS prompt
39# assume the data file is in proper directory
40
41if __name__=="__main__":
42    table = trans_mulan_data("../../doc/datasets/emotions.xml","../../doc/datasets/emotions.arff")
43   
44    for i in range(10):
45        print table[i]
46   
47    table.save("emotions.tab")
Note: See TracBrowser for help on using the repository browser.