source: orange/Orange/doc/modules/reinforcement1.py @ 9671:a7b056375472

Revision 9671:a7b056375472, 527 bytes checked in by anze <anze.staric@…>, 2 years ago (diff)

Moved orange to Orange (part 2)

Line 
1import orngReinforcement
2
3r = orngReinforcement.RLSarsa(2,1)
4
5ans = r.init([0,0]) #initialize episode
6
7#if state is (0,0), act 0
8for i in range(10):
9  if ans == 0: reward = 1   
10  else: reward = 0
11  ans = r.decide(reward,[0,0])
12
13#if state is (1,1), act 1
14for i in range(10):
15  if ans == 1: reward = 1 
16  else: reward = 0
17  ans = r.decide(reward,[1,1])
18
19r.epsilon = 0.0 #no random action
20r.alpha = 0.0 #stop learning
21
22print "in (0,0) do", r.decide(0,[0,0]) #should output 0
23print "in (1,1) do", r.decide(0,[1,1]) #should output 1
Note: See TracBrowser for help on using the repository browser.