diff options
| author | Loic Guegan <manzerbredes@mailbox.org> | 2022-11-01 22:12:14 +0100 |
|---|---|---|
| committer | Loic Guegan <manzerbredes@mailbox.org> | 2022-11-01 22:12:14 +0100 |
| commit | a1469f368b01828342f70e906d436b5849a1d737 (patch) | |
| tree | 8a9f47a5458e96803fcd7ed8d8312dcdd3f1b5a4 /qlearning.py | |
| parent | 3b35b6866d4abbc4eb446ab8a4a06c305305325b (diff) | |
Minor changes
Diffstat (limited to 'qlearning.py')
| -rwxr-xr-x | qlearning.py | 51 |
1 files changed, 40 insertions, 11 deletions
diff --git a/qlearning.py b/qlearning.py index cbb90db..1e5c16f 100755 --- a/qlearning.py +++ b/qlearning.py @@ -21,17 +21,18 @@ from snake import Snake # Obstacle at right? # Obstacle at down? # Obstacle at left? -##### Totally 12 boolean features so 2^12=4096 states +# Queue in front? +##### Totally 13 boolean features so 2^13=8192 states ##### Totally 4 actions for the AI (up, right,down,left) -##### Totally 4*2^12 thus 16 384 table entries +##### Totally 4*2^13 thus 32768 table entries ##### Reward +1 when eat an apple ##### Reward -10 when hit obstacle -qtable=np.zeros((4096, 4)) +qtable=np.zeros((2**13, 4)) -game=Snake(length=4,fps=200) +game=Snake(length=4,fps=200,startat=(10,10)) def isWall(h,game): if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height: @@ -67,6 +68,28 @@ def event_handler(game,event): obstacle_down=(down in game.snake or isWall(down, game)) obstacle_left=(left in game.snake or isWall(left, game)) + queue_in_front=0 + if game.direction == 3: + for x in range(h[0],game.grid_width): + if (x,h[1]) in game.snake[1:]: + queue_in_front=1 + break + elif game.direction == 9: + for x in range(0,h[0]): + if (x,h[1]) in game.snake[1:]: + queue_in_front=1 + break + elif game.direction == 12: + for y in range(0,h[1]): + if (h[0],y) in game.snake[1:]: + queue_in_front=1 + break + elif game.direction == 6: + for y in range(h[1],game.grid_height): + if (h[0],y) in game.snake[1:]: + queue_in_front=1 + break + reward=0 if event==0: attempt+=1 @@ -76,12 +99,9 @@ def event_handler(game,event): elif event==1: reward=1 attempt=0 - # Avoid infinite loop - if attempt>3000: - reward=-1 - attempt=0 + # This come from me I do not now if it is the best way to identify a state - state=2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left + state=2**12*queue_in_front+2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left # Choose an action action=random.choice((0,1,2,3)) @@ -91,6 +111,10 @@ def event_handler(game,event): #action = random.choice(options) action=np.argmax(qtable[state]) + # Avoid infinite loop + if attempt>game.grid_height*game.grid_width: + return(-1) + # Update current state Q if last_state != None: qtable[last_state,last_action]=qtable[last_state,last_action]+0.7*(reward+0.9*np.max(qtable[state])-qtable[last_state,last_action]) @@ -106,13 +130,18 @@ def event_handler(game,event): elif action==3: snake_action=9 game.direction=snake_action + return(0) if os.path.exists("qtable.txt"): qtable=np.loadtxt("qtable.txt") + +perf=0 for i in range(0,10000): last_state=None last_action=None score=game.run(event_handler=event_handler) - if i%100 == 0: + attempt=0 + if i%10 == 0: np.savetxt('qtable.txt',qtable) - print("Game ended with "+str(score))
\ No newline at end of file + perf=max(perf,score) + print("Game ended with "+str(score)+" best so far is "+str(perf))
\ No newline at end of file |
