qlearning.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

#!/usr/bin/env python
import sys,random,os
import numpy as np

# Import snake game
from snake import Snake


# Setup QTable
# Boolean features:
# Snake go up?
# Snake go right?
# Snake go down?
# Snake go left?
# Apple at up?
# Apple at right?
# Apple at down?
# Apple at left?
# Obstacle at up?
# Obstacle at right?
# Obstacle at down?
# Obstacle at left?
##### Totally 12 boolean features so 2^12=4096 states
##### Totally 4 actions for the AI (up, right,down,left)
##### Totally 4*2^12 thus 16 384 table entries
##### Reward +1 when eat an apple
##### Reward -10 when hit obstacle

qtable=np.zeros((4096, 4))


game=Snake(length=4,fps=200)

def isWall(h,game):
    if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height:
        return(True)
    return(False)


last_state=None
last_action=None
attempt=0
def event_handler(game,event):
    global last_state,last_action,attempt
    
    h=game.snake[0]
    left=(h[0]-1,h[1])
    right=(h[0]+1,h[1])
    up=(h[0],h[1]-1)
    down=(h[0],h[1]+1)
    a=game.apple

    snake_go_up=(game.direction==12)
    snake_go_right=(game.direction==3)
    snake_go_down=(game.direction==6)
    snake_go_left=(game.direction==9)

    apple_up=(a[1]<h[1])
    apple_right=(a[0]>h[0])
    apple_down=(a[1]>h[1])
    apple_left=(a[0]<h[0])

    obstacle_up=(up in game.snake or isWall(up, game))
    obstacle_right=(right in game.snake or isWall(right, game))
    obstacle_down=(down in game.snake or isWall(down, game))
    obstacle_left=(left in game.snake or isWall(left, game))

    reward=0
    if event==0:
        attempt+=1
    if event==-1:
        reward=-10
        attempt=0
    elif event==1:
        reward=1
        attempt=0
    # Avoid infinite loop
    if attempt>3000:
        reward=-1
        attempt=0
    # This come from me I do not now if it is the best way to identify a state
    state=2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left

    # Choose an action
    action=random.choice((0,1,2,3))
    if np.max(qtable[state]) > 0:
        #qactions=qtable[state]
        #options=np.flatnonzero(qactions == np.max(qactions)) # Since Q value might be equals for several actions
        #action = random.choice(options)
        action=np.argmax(qtable[state])

    # Update current state Q
    if last_state != None:
        qtable[last_state,last_action]=qtable[last_state,last_action]+0.7*(reward+0.9*np.max(qtable[state])-qtable[last_state,last_action])
    last_state=state
    last_action=action

    # Apply the action
    snake_action=12
    if action==1:
        snake_action=3
    elif action==2:
        snake_action=6
    elif action==3:
        snake_action=9
    game.direction=snake_action

if os.path.exists("qtable.txt"):
    qtable=np.loadtxt("qtable.txt")
for i in range(0,10000):
    last_state=None
    last_action=None
    score=game.run(event_handler=event_handler)
    if i%100 == 0:
        np.savetxt('qtable.txt',qtable)
    print("Game ended with "+str(score))