source: orange/Orange/multilabel/br.py @ 10502:6b593a8cd5a0

Revision 10502:6b593a8cd5a0, 4.9 KB checked in by Matija Polajnar <matija.polajnar@…>, 2 years ago (diff)

Make multi-label warning and errors more clear on what a 'multi-label dataset' is from our perspective; Miha warned me students had troubles with this.

Line 
1"""
2.. index:: Binary Relevance Learner
3
4***************************************
5Binary Relevance Learner
6***************************************
7
8The most basic problem transformation method for multi-label classification
9is the Binary Relevance method.
10It learns :math:`|L|` binary classifiers :math:`H_l:X \\rightarrow \{l, \\neg l\}`,
11one for each different label :math:`l` in :math:`L`.
12It transforms the original data set into :math:`|L|` data sets :math:`D_l`
13that contain all examples of the original data set, labelled as
14:math:`l` if the labels of the original example contained :math:`l` and
15as :math:`\\neg l` otherwise. It is the same solution used in order
16to deal with a single-label multi-class problem using a binary classifier.
17For more information, see G. Tsoumakas and I. Katakis. `Multi-label classification: An overview
18<http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.9401&rep=rep1&type=pdf>`_.
19International Journal of Data Warehousing and Mining, 3(3):1-13, 2007.
20
21Note that a copy of the table is made in RAM for each label to enable construction of
22a classifier. Due to technical limitations, that is currently unavoidable and
23should be remedied in Orange 3.
24
25.. index:: Binary Relevance Learner
26.. autoclass:: Orange.multilabel.BinaryRelevanceLearner
27   :members:
28   :show-inheritance:
29 
30   :param instances: a table of instances.
31   :type instances: :class:`Orange.data.Table`
32     
33   :param base_learner: the binary learner, the default learner is
34                        :class:`Orange.classification.bayes.NaiveLearner`.
35   :type base_learner: :class:`Orange.classification.Learner`
36
37.. index:: Binary Relevance Classifier
38.. autoclass:: Orange.multilabel.BinaryRelevanceClassifier
39   :members:
40   :show-inheritance:
41
42   
43Examples
44========
45
46The following example demonstrates a straightforward invocation of
47this algorithm (:download:`mlc-classify.py <code/mlc-classify.py>`):
48
49.. literalinclude:: code/mlc-classify.py
50   :lines: 6, 15-17
51
52"""
53
54import Orange
55from Orange.classification.bayes import NaiveLearner as _BayesLearner
56import multibase as _multibase
57
58class BinaryRelevanceLearner(_multibase.MultiLabelLearner):
59    """
60    Class that implements the Binary Relevance (BR) method.
61    """
62    def __new__(cls, instances = None, base_learner = None, weight_id = 0, **argkw):
63        self = _multibase.MultiLabelLearner.__new__(cls, **argkw)
64        if base_learner:
65            self.base_learner = base_learner
66        else:
67            self.base_learner = _BayesLearner
68       
69        if instances:
70            self.__init__(**argkw)
71            return self.__call__(instances, weight_id)
72        else:
73            return self
74       
75    def __call__(self, instances, weight_id = 0, **kwds):
76        if not Orange.multilabel.is_multilabel(instances):
77            raise TypeError("The given data set is not a multi-label data set"
78                            " with class values 0 and 1.")
79       
80        for k in kwds.keys():
81            self.__dict__[k] = kwds[k]
82
83        classifiers = []
84           
85        for c in instances.domain.class_vars:
86            new_domain = Orange.data.Domain(instances.domain.attributes, c)
87           
88            #build the instances
89            new_table = Orange.data.Table(new_domain, instances)
90            classifer = self.base_learner(new_table)
91            classifiers.append(classifer)
92           
93        #Learn from the given table of data instances.
94        return BinaryRelevanceClassifier(instances = instances, 
95                                         classifiers = classifiers,
96                                         weight_id = weight_id)
97
98class BinaryRelevanceClassifier(_multibase.MultiLabelClassifier):
99    def __init__(self, **kwds):
100        self.__dict__.update(kwds)
101       
102    def __call__(self, instance, result_type=Orange.classification.Classifier.GetValue):
103        """
104        :rtype: a list of :class:`Orange.data.Value`, a list of :class:`Orange.statistics.distribution.Distribution`, or a tuple with both
105        """
106        domain = self.instances.domain
107        labels = []
108        dists = []
109       
110        for c in self.classifiers:
111            v, p = c(instance, Orange.classification.Classifier.GetBoth)
112               
113            labels.append(v)
114            dists.append(p)
115           
116        if result_type == Orange.classification.Classifier.GetValue:
117            return labels
118        if result_type == Orange.classification.Classifier.GetProbabilities:
119            return dists
120        return labels, dists
121       
122#########################################################################################
123# A quick test/example.
124
125if __name__ == "__main__":
126    data = Orange.data.Table("emotions.tab")
127
128    classifier = Orange.multilabel.BinaryRelevanceLearner(data,Orange.classification.knn.kNNLearner)
129    for i in range(10):
130        c,p = classifier(data[i],Orange.classification.Classifier.GetBoth)
131        print c,p
Note: See TracBrowser for help on using the repository browser.