summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLoic Guegan <manzerbredes@mailbox.org>2022-11-01 17:30:25 +0100
committerLoic Guegan <manzerbredes@mailbox.org>2022-11-01 17:30:25 +0100
commit451856be1b2b309490fe9c5a63834496f7f36efa (patch)
tree69a43d10fa90995e80365a25712e46d971bbdfc2
parent69c5d709a459abc9e99c04de3d45e719381838bd (diff)
Minor changes
-rwxr-xr-xqlearning.py29
1 files changed, 25 insertions, 4 deletions
diff --git a/qlearning.py b/qlearning.py
index e35d778..60775e3 100755
--- a/qlearning.py
+++ b/qlearning.py
@@ -38,7 +38,12 @@ def isWall(h,game):
return(True)
return(False)
+
+last_state=None
+last_action=None
def event_handler(game,event):
+ global last_state,last_action
+
h=game.snake[0]
left=(h[0]-1,h[1])
right=(h[0]+1,h[1])
@@ -70,14 +75,30 @@ def event_handler(game,event):
# This come from me I do not now if it is the best way to identify a state
state=2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
-
+ # Choose an action
if np.max(qtable[state]) > 0:
action = np.argmax(qtable[state])
else:
- action=random.choice((12,3,6,9))
-
- game.direction=action
+ action=random.choice((0,1,2,3))
+
+ # Update current state Q
+ if last_state != None:
+ qtable[last_state,last_action]=qtable[last_state,last_action]+0.5*(reward+0.5*qtable[state])
+ last_state=state
+ last_action=action
+
+ # Apply the action
+ snake_action=12
+ if action==1:
+ snake_action=3
+ elif action==2:
+ snake_action=6
+ elif action==3:
+ snake_action=9
+ game.direction=snake_action
for i in range(0,10):
+ last_state=None
+ last_action=None
score=game.run(event_handler=event_handler)
print("Game ended with "+str(score)) \ No newline at end of file