qlearning.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

#!/usr/bin/env python
import sys,random,os
import numpy as np

# Import snake game
from snake import Snake


# Setup QTable
# Boolean features:
# Snake go up?
# Snake go right?
# Snake go down?
# Snake go left?
# Apple at up?
# Apple at right?
# Apple at down?
# Apple at left?
# Obstacle at up?
# Obstacle at right?
# Obstacle at down?
# Obstacle at left?
# Queue in front?
##### Totally 13 boolean features so 2^13=8192 states
##### Totally 4 actions for the AI (up, right,down,left)
##### Totally 4*2^13 thus 32768 table entries
##### Reward +1 when eat an apple
##### Reward -10 when hit obstacle

qtable=np.zeros((2**13, 4))


game=Snake(length=1,fps=200,startat=(10,10))

def isWall(h,game):
    if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height:
        return(True)
    return(False)


last_state=None
last_action=None
attempt=0
def event_handler(game,event):
    global last_state,last_action,attempt
    
    h=game.snake[0]
    left=(h[0]-1,h[1])
    right=(h[0]+1,h[1])
    up=(h[0],h[1]-1)
    down=(h[0],h[1]+1)
    a=game.apple

    snake_go_up=(game.direction==12)
    snake_go_right=(game.direction==3)
    snake_go_down=(game.direction==6)
    snake_go_left=(game.direction==9)

    apple_up=(a[1]<h[1])
    apple_right=(a[0]>h[0])
    apple_down=(a[1]>h[1])
    apple_left=(a[0]<h[0])

    obstacle_up=(up in game.snake or isWall(up, game))
    obstacle_right=(right in game.snake or isWall(right, game))
    obstacle_down=(down in game.snake or isWall(down, game))
    obstacle_left=(left in game.snake or isWall(left, game))

    queue_in_front=0
    if game.direction == 3:
        for x in range(h[0],game.grid_width):
            if (x,h[1]) in game.snake[1:]:
                queue_in_front=1
                break
    elif game.direction == 9:
        for x in range(0,h[0]):
            if (x,h[1]) in game.snake[1:]:
                queue_in_front=1
                break
    elif game.direction == 12:
        for y in range(0,h[1]):
            if (h[0],y) in game.snake[1:]:
                queue_in_front=1
                break
    elif game.direction == 6:
        for y in range(h[1],game.grid_height):
            if (h[0],y) in game.snake[1:]:
                queue_in_front=1
                break

    reward=0
    if event==0:
        attempt+=1
    if event==-1:
        reward=-10
        attempt=0
    elif event==1:
        reward=5
        attempt=0

    # This come from me I do not now if it is the best way to identify a state
    state=2**12*queue_in_front+2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left

    # Choose an action
    action=random.choice((0,1,2,3))
    if np.max(qtable[state]) > 0:
        #qactions=qtable[state]
        #options=np.flatnonzero(qactions == np.max(qactions)) # Since Q value might be equals for several actions
        #action = random.choice(options)
        action=np.argmax(qtable[state])

    # Avoid infinite loop
    if attempt>game.grid_height*game.grid_width:
        return(-1)

    # Update current state Q
    if last_state != None:
        qtable[last_state,last_action]=qtable[last_state,last_action]+0.7*(reward+0.9*np.max(qtable[state])-qtable[last_state,last_action])
    last_state=state
    last_action=action

    # Apply the action
    snake_action=12
    if action==1:
        snake_action=3
    elif action==2:
        snake_action=6
    elif action==3:
        snake_action=9
    game.direction=snake_action
    return(0)

if os.path.exists("qtable.txt"):
    qtable=np.loadtxt("qtable.txt")

perf=0
for i in range(0,10000):
    last_state=None
    last_action=None
    score=game.run(event_handler=event_handler)
    attempt=0
    if i%10 == 0:
        np.savetxt('qtable.txt',qtable)
    perf=max(perf,score)
    print("Game ended with "+str(score)+"  best so far is "+str(perf))