Orange Forum • View topic - Building a classifier and saving it for later.

Building a classifier and saving it for later.

A place to ask questions about methods in Orange and how they are used and other general support.

Building a classifier and saving it for later.

Postby David T » Fri May 02, 2008 4:30

Hello all,

I've been looking at the Orange project a bit here and there for the past few months, related to a project I want to complete with it.

I was curious about something, though, that I haven't found an answer to anywhere.

Lets say I have a set of training data, csv file or whatever, and I train a classifier in orange. I would like to save this classifier, so I can classify once off training data, then as I get more data in, I check the new data against the classifier to come up with the class label. Something like streaming data, even though it won't really be streaming.

I hope this makes sense on what I'd like to do. The goal is so I can train now, and classify later.

Thanks,

David

Save for Maxent Classifier

Postby Guest » Sat Jun 28, 2008 23:37

Hi David,

some long time ago I changed the Maxent Classifier adding a save option since the original maxent toolkit offers this feature. If I am not wrong it worked nicely. See the code of the changed module:

I hope this helps.

Elena

chanaged orngMaxent.py with save method:

#! /usr/bin/env python
# vi:ts=4:tw=78:shiftwidth=4:expandtab
# vim600:fdm=marker
#
# orngMaxent.py - Python interface of the C++ MaxEnt for Orange lib
#
# Copyright (C) 2004 by Zhang Le <ejoy@users.sourceforge.net>
# Begin : 01-Nov-2004
# Last Change : 11-Jan-2005.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program.
#
# Brief usage (see unittests below):
# // load data
# data = orange.ExampleTable("voting")
#
# // build a learner, specify training data and training parameters
# classifier = MaxentLearner(data, iters = 10)
#
# // do prediction on the ith data
# // way 1: return the most possible class label
# c = classifier(data[i])
#
# // way 2: return probability distribution for all class labels
# // p[0] is the probability of the first class in data.domain.classVar.values
# // p[1] is the probability of the second class, and so on
# p = classifier(data[i], orange.GetProbabilities)
#
# // way 3: return a tuple of the most possible class label and its probability
# r = classifier(data[i], orange.GetBoth)
#
# // Finally, if you want verbose message during training, just call set_verbose:
# set_verbose(1)
#
# // modified:
# allows passing a list of features (len(features) = number of data rows)
# replacing the attributes as features (default)
# classifier = MaxentLearner(data, features = featurelist)
#
# added: save support


try:
from maxent import *
except ImportError:
import sys
print >> sys.stderr, 'maxent module not found, get it from homepages.inf.ed.ac.uk/s0450736/maxent_toolkit.html'
sys.exit(-1)

import orange

# extract features from an orange example
# return a list of features
def extract_features(ex, features=None):
""" modified: allows list features with customized features"""
f = []
if features and len(features) == len(ex): # featurelist given
f = features
else: # take the attributes as features
for i, a in enumerate(ex.domain.attributes):
f.append('%s=%s' % (a.name, ex[i]))
return f

def MaxentLearner(examples=None, features=None, **kwds):
learner = apply(MaxentLearnerClass,(), kwds)
if examples:
print "MaxentLearner(examples)"
return learner(examples, features)
else:
print "MaxentLearner()"
return learner(features)

class MaxentLearnerClass:
def __init__(self, name='Maximum Entropy Learner',
iters = 15, method = 'lbfgs', gaussian = 0.0):
print "MaxentLearnerClass.__init__()"
self.name = name
self.iters = iters
assert method == 'lbfgs' or method == 'gis'
self.method = method
self.gaussian = gaussian
#self.features = features
self.m = MaxentModel()

def __call__(self, data, weight=None, features=None):

# we will ignore the weight
# build the me model here
print "MaxentLearnerClass.__call__()"
self.m.begin_add_event()
#print "features: ", features # why doesnt feature reach here???
for ex in data:
self.m.add_event(extract_features(ex, features), ex.getclass().value)
self.m.end_add_event()
self.m.train(self.iters, self.method, self.gaussian)
return MaxentClassifier(model = self.m, domain = data.domain, features = features)

def load(self, filename):
print "MaxentClassifier.load()"
self.m.load(filename)

class MaxentClassifier:
def __init__(self, **kwds):
print "MaxentClassifier.__init__()"
self.__dict__ = kwds

def __call__(self, example, result_type = orange.GetValue, features=None):
print "MaxentClassifier.__call__()"
if result_type == orange.GetValue:
return orange.Value(self.domain.classVar, self.model.predict(extract_features(example, features)))
else:
# build a label map, which will be used to sort the outputted
# probabilities
class_map = {}
for pos, label in enumerate(self.domain.classVar.values):
class_map[label] = pos
result = self.model.eval_all(extract_features(example, features))
if len(result) > 0:
if result_type == orange.GetProbabilities:
r = [None]*len(result)
for label, prob in result:
r[class_map[label]] = prob
return r
elif result_type == orange.GetBoth:
return (orange.Value(self.domain.classVar, result[0][0]), result[0][1])
else:
return None

def save(self, filename):
# added:
print "MaxentClassifier.save()"
self.model.save(filename)

if __name__ == '__main__':
import unittest
import os

class TestOrngMaxent(unittest.TestCase):
def setUp(self):
set_verbose(1)
self.data = orange.ExampleTable(os.path.join(data_path, "voting.tab"))
self.classifier = MaxentLearner(self.data, iters = 10)
#self.classifier = orange.MaxentClassifier(data)

def test_predict_class(self):
for i in range(5):
c = self.classifier(self.data[i])
print "original", self.data[i].getclass(), "classified as", c
self.assertEqual(self.data[i].getclass(), c)

def test_predict_prob(self):
print "Possible classes:", self.data.domain.classVar.values
print "Probabilities for democrats:"

for i in range(5):
p = self.classifier(self.data[i], orange.GetProbabilities)
print "%d: %5.3f (originally %s)" % (i+1, p[1], self.data[i].getclass())
self.assertAlmostEqual(p[1], self.data[i].getclass() == \
'democrat' and 1.0 or 0.0, 2)

def test_predict_both(self):
for i in range(5):
r = self.classifier(self.data[i], orange.GetBoth)
self.assertEqual(r[0], self.data[i].getclass())
self.assertAlmostEqual(r[1], 1.0, 2)

print 'running unittest...'
data_path = r"I:\Python\Python25\Lib\site-packages\orange\doc\datasets"
unittest.main()

Postby Guest » Sat Jun 28, 2008 23:41

Uuups, all the indents are gone.
But it you just add this method to the Maxent class that should do the trick.

def save(self, filename):
# SK added: see
print "MaxentClassifier.save()"
self.model.save(filename)

Elena


Return to Questions & Support