# source:orange/orange/Orange/multilabel/lp.py@9505:4b798678cd3d

Revision 9505:4b798678cd3d, 4.8 KB checked in by matija <matija.polajnar@…>, 2 years ago (diff)

Merge in the (heavily modified) MLC code from GSOC 2011 (modules, documentation, evaluation code, regression test). Widgets will be merged in a little bit later, which will finally close ticket #992.

Line
1"""
2.. index:: LabelPowerset Learner
3
4***************************************
5LabelPowerset Learner
6***************************************
7
8LabelPowerset Classification is another transformation method for multi-label classification.
9It considers each different set of labels that exists in the multi-label data as a
10single class. Thus it learns a classification problem :math:H:X \\rightarrow \\mathbb{P}(L), where
11:math:\\mathbb{P}(L) is the power set of L.
12For more information, see G. Tsoumakas and I. Katakis. Multi-label classification: An overview
14International Journal of Data Warehousing and Mining, 3(3):1-13, 2007.
15
16.. index:: LabelPowerset Learner
17.. autoclass:: Orange.multilabel.LabelPowersetLearner
18   :members:
19   :show-inheritance:
20
21   :param instances: a table of instances.
22   :type instances: :class:Orange.data.Table
23
24   :param base_learner: the binary learner, the default learner is BayesLearner
25   :type base_learner: :class:Orange.classification.Learner
26
27.. index:: LabelPowerset Classifier
28.. autoclass:: Orange.multilabel.LabelPowersetClassifier
29   :members:
30   :show-inheritance:
31
32Examples
33========
34
35The following example demonstrates a straightforward invocation of
36this algorithm (:download:mlc-classify.py <code/mlc-classify.py>, uses
37:download:emotions.tab <code/emotions.tab>):
38
39.. literalinclude:: code/mlc-classify.py
40   :lines: 6, 19-21
41
42"""
43
44import Orange
45from Orange.core import BayesLearner as _BayesLearner
46import multibase as _multibase
47
48def get_label_bitstream(e):
49    return ''.join(lv.value for lv in e.get_classes())
50
51def transform_to_powerset(instances):
52    new_class = Orange.data.variable.Discrete("label")
53
54    for e in instances:
55        class_value = get_label_bitstream(e)
57
58    new_domain = Orange.data.Domain(instances.domain.attributes, new_class)
59
60    #build the instances
61    new_table = Orange.data.Table(new_domain)
62    for e in instances:
63        new_row = Orange.data.Instance(
64          new_domain,
65          [e[a].value for a in instances.domain.attributes] +
66          [get_label_bitstream(e)])
67
68        new_table.append(new_row)
69
70    return new_table
71
72class LabelPowersetLearner(_multibase.MultiLabelLearner):
73    """
74    Class that implements the LabelPowerset (LP) method.
75    """
76    def __new__(cls, instances = None, base_learner = None, weight_id = 0, **argkw):
77        self = _multibase.MultiLabelLearner.__new__(cls, **argkw)
78
79        if instances:
80            self.__init__(**argkw)
81            return self.__call__(instances, base_learner, weight_id)
82        else:
83            return self
84
85    def __call__(self, instances, base_learner = None, weight_id = 0, **kwds):
86        if not Orange.multilabel.is_multilabel(instances):
87            raise TypeError("The given data set is not a multi-label data set.")
88
89        self.__dict__.update(kwds)
90
91        new_table = transform_to_powerset(instances)
92
93        #store the classifier
94        base_learner = base_learner if base_learner else _BayesLearner
95        classifier = base_learner(new_table)
96
97        #Learn from the given table of data instances.
98        return LabelPowersetClassifier(instances = instances,
99                                       classifier = classifier,
100                                       weight_id = weight_id)
101
102class LabelPowersetClassifier(_multibase.MultiLabelClassifier):
103    def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue):
104        """
105        :rtype: a list of :class:Orange.data.Value, a list of :class:Orange.statistics.distribution.Distribution, or a tuple with both
106        """
107        labels = []
108        prob = []
109
110        c = self.classifier(instance)
111        for bit, lvar in zip(c.value, self.instances.domain.class_vars):
112            labels.append(Orange.data.Value(lvar, bit))
113            prob.append(float(bit == '1'))
114
115        if result_type == Orange.classification.Classifier.GetValue:
116            return labels
117
118        disc = [Orange.statistics.distribution.Discrete([1-p, p]) for p in prob]
119        for v, d in zip(self.instances.domain.class_vars, disc):
120            d.variable = v
121
122        if result_type == Orange.classification.Classifier.GetProbabilities:
123            return disc
124        return labels,disc
125
126#########################################################################################
127# Test the code, run from DOS prompt
128# assume the data file is in proper directory
129
130if __name__ == "__main__":
131    data = Orange.data.Table("emotions.tab")
132
133    classifier = Orange.multilabel.LabelPowersetLearner(data)
134    for i in range(10):
135        c,p = classifier(data[i],Orange.classification.Classifier.GetBoth)
136        print c,p
Note: See TracBrowser for help on using the repository browser.