summaryrefslogtreecommitdiff
path: root/qlearning.py
diff options
context:
space:
mode:
authorLoic Guegan <manzerbredes@mailbox.org>2022-11-01 22:12:14 +0100
committerLoic Guegan <manzerbredes@mailbox.org>2022-11-01 22:12:14 +0100
commita1469f368b01828342f70e906d436b5849a1d737 (patch)
tree8a9f47a5458e96803fcd7ed8d8312dcdd3f1b5a4 /qlearning.py
parent3b35b6866d4abbc4eb446ab8a4a06c305305325b (diff)
Minor changes
Diffstat (limited to 'qlearning.py')
-rwxr-xr-xqlearning.py51
1 files changed, 40 insertions, 11 deletions
diff --git a/qlearning.py b/qlearning.py
index cbb90db..1e5c16f 100755
--- a/qlearning.py
+++ b/qlearning.py
@@ -21,17 +21,18 @@ from snake import Snake
# Obstacle at right?
# Obstacle at down?
# Obstacle at left?
-##### Totally 12 boolean features so 2^12=4096 states
+# Queue in front?
+##### Totally 13 boolean features so 2^13=8192 states
##### Totally 4 actions for the AI (up, right,down,left)
-##### Totally 4*2^12 thus 16 384 table entries
+##### Totally 4*2^13 thus 32768 table entries
##### Reward +1 when eat an apple
##### Reward -10 when hit obstacle
-qtable=np.zeros((4096, 4))
+qtable=np.zeros((2**13, 4))
-game=Snake(length=4,fps=200)
+game=Snake(length=4,fps=200,startat=(10,10))
def isWall(h,game):
if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height:
@@ -67,6 +68,28 @@ def event_handler(game,event):
obstacle_down=(down in game.snake or isWall(down, game))
obstacle_left=(left in game.snake or isWall(left, game))
+ queue_in_front=0
+ if game.direction == 3:
+ for x in range(h[0],game.grid_width):
+ if (x,h[1]) in game.snake[1:]:
+ queue_in_front=1
+ break
+ elif game.direction == 9:
+ for x in range(0,h[0]):
+ if (x,h[1]) in game.snake[1:]:
+ queue_in_front=1
+ break
+ elif game.direction == 12:
+ for y in range(0,h[1]):
+ if (h[0],y) in game.snake[1:]:
+ queue_in_front=1
+ break
+ elif game.direction == 6:
+ for y in range(h[1],game.grid_height):
+ if (h[0],y) in game.snake[1:]:
+ queue_in_front=1
+ break
+
reward=0
if event==0:
attempt+=1
@@ -76,12 +99,9 @@ def event_handler(game,event):
elif event==1:
reward=1
attempt=0
- # Avoid infinite loop
- if attempt>3000:
- reward=-1
- attempt=0
+
# This come from me I do not now if it is the best way to identify a state
- state=2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
+ state=2**12*queue_in_front+2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
# Choose an action
action=random.choice((0,1,2,3))
@@ -91,6 +111,10 @@ def event_handler(game,event):
#action = random.choice(options)
action=np.argmax(qtable[state])
+ # Avoid infinite loop
+ if attempt>game.grid_height*game.grid_width:
+ return(-1)
+
# Update current state Q
if last_state != None:
qtable[last_state,last_action]=qtable[last_state,last_action]+0.7*(reward+0.9*np.max(qtable[state])-qtable[last_state,last_action])
@@ -106,13 +130,18 @@ def event_handler(game,event):
elif action==3:
snake_action=9
game.direction=snake_action
+ return(0)
if os.path.exists("qtable.txt"):
qtable=np.loadtxt("qtable.txt")
+
+perf=0
for i in range(0,10000):
last_state=None
last_action=None
score=game.run(event_handler=event_handler)
- if i%100 == 0:
+ attempt=0
+ if i%10 == 0:
np.savetxt('qtable.txt',qtable)
- print("Game ended with "+str(score)) \ No newline at end of file
+ perf=max(perf,score)
+ print("Game ended with "+str(score)+" best so far is "+str(perf)) \ No newline at end of file