diff options
Diffstat (limited to 'qlearning.py')
| -rwxr-xr-x | qlearning.py | 45 |
1 files changed, 29 insertions, 16 deletions
diff --git a/qlearning.py b/qlearning.py index 492039d..cbb90db 100755 --- a/qlearning.py +++ b/qlearning.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -import sys,random +import sys,random,os import numpy as np # Import snake game @@ -31,7 +31,7 @@ qtable=np.zeros((4096, 4)) -game=Snake() +game=Snake(length=4,fps=200) def isWall(h,game): if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height: @@ -41,8 +41,9 @@ def isWall(h,game): last_state=None last_action=None +attempt=0 def event_handler(game,event): - global last_state,last_action + global last_state,last_action,attempt h=game.snake[0] left=(h[0]-1,h[1]) @@ -56,10 +57,10 @@ def event_handler(game,event): snake_go_down=(game.direction==6) snake_go_left=(game.direction==9) - apple_up=(up==a) - apple_right=(right==a) - apple_down=(down==a) - apple_left=(left==a) + apple_up=(a[1]<h[1]) + apple_right=(a[0]>h[0]) + apple_down=(a[1]>h[1]) + apple_left=(a[0]<h[0]) obstacle_up=(up in game.snake or isWall(up, game)) obstacle_right=(right in game.snake or isWall(right, game)) @@ -67,24 +68,32 @@ def event_handler(game,event): obstacle_left=(left in game.snake or isWall(left, game)) reward=0 - if event==1: - reward=1 - elif event==-1: + if event==0: + attempt+=1 + if event==-1: reward=-10 - + attempt=0 + elif event==1: + reward=1 + attempt=0 + # Avoid infinite loop + if attempt>3000: + reward=-1 + attempt=0 # This come from me I do not now if it is the best way to identify a state state=2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left # Choose an action action=random.choice((0,1,2,3)) if np.max(qtable[state]) > 0: - action = np.argmax(qtable[state]) - - print(np.max(qtable[state])) + #qactions=qtable[state] + #options=np.flatnonzero(qactions == np.max(qactions)) # Since Q value might be equals for several actions + #action = random.choice(options) + action=np.argmax(qtable[state]) # Update current state Q if last_state != None: - qtable[last_state,last_action]=qtable[last_state,last_action]+0.5*(reward+0.5*qtable[state]) + qtable[last_state,last_action]=qtable[last_state,last_action]+0.7*(reward+0.9*np.max(qtable[state])-qtable[last_state,last_action]) last_state=state last_action=action @@ -98,8 +107,12 @@ def event_handler(game,event): snake_action=9 game.direction=snake_action -for i in range(0,10): +if os.path.exists("qtable.txt"): + qtable=np.loadtxt("qtable.txt") +for i in range(0,10000): last_state=None last_action=None score=game.run(event_handler=event_handler) + if i%100 == 0: + np.savetxt('qtable.txt',qtable) print("Game ended with "+str(score))
\ No newline at end of file |
