Minor changes

author: Loic Guegan <manzerbredes@mailbox.org> 2022-11-01 22:12:14 +0100
committer: Loic Guegan <manzerbredes@mailbox.org> 2022-11-01 22:12:14 +0100
commit: a1469f368b01828342f70e906d436b5849a1d737 (patch)
tree: 8a9f47a5458e96803fcd7ed8d8312dcdd3f1b5a4 /qlearning.py
parent: 3b35b6866d4abbc4eb446ab8a4a06c305305325b (diff)
1 files changed, 40 insertions, 11 deletions
diff --git a/qlearning.py b/qlearning.py
index cbb90db..1e5c16f 100755
--- a/qlearning.py
+++ b/qlearning.py
@@ -21,17 +21,18 @@ from snake import Snake
 # Obstacle at right?
 # Obstacle at down?
 # Obstacle at left?
-##### Totally 12 boolean features so 2^12=4096 states
+# Queue in front?
+##### Totally 13 boolean features so 2^13=8192 states
 ##### Totally 4 actions for the AI (up, right,down,left)
-##### Totally 4*2^12 thus 16 384 table entries
+##### Totally 4*2^13 thus 32768 table entries
 ##### Reward +1 when eat an apple
 ##### Reward -10 when hit obstacle
 
-qtable=np.zeros((4096, 4))
+qtable=np.zeros((2**13, 4))
 
 
 
-game=Snake(length=4,fps=200)
+game=Snake(length=4,fps=200,startat=(10,10))
 
 def isWall(h,game):
     if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height:
@@ -67,6 +68,28 @@ def event_handler(game,event):
     obstacle_down=(down in game.snake or isWall(down, game))
     obstacle_left=(left in game.snake or isWall(left, game))
 
+    queue_in_front=0
+    if game.direction == 3:
+        for x in range(h[0],game.grid_width):
+            if (x,h[1]) in game.snake[1:]:
+                queue_in_front=1
+                break
+    elif game.direction == 9:
+        for x in range(0,h[0]):
+            if (x,h[1]) in game.snake[1:]:
+                queue_in_front=1
+                break
+    elif game.direction == 12:
+        for y in range(0,h[1]):
+            if (h[0],y) in game.snake[1:]:
+                queue_in_front=1
+                break
+    elif game.direction == 6:
+        for y in range(h[1],game.grid_height):
+            if (h[0],y) in game.snake[1:]:
+                queue_in_front=1
+                break
+
     reward=0
     if event==0:
         attempt+=1
@@ -76,12 +99,9 @@ def event_handler(game,event):
     elif event==1:
         reward=1
         attempt=0
-    # Avoid infinite loop
-    if attempt>3000:
-        reward=-1
-        attempt=0
+
     # This come from me I do not now if it is the best way to identify a state
-    state=2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
+    state=2**12*queue_in_front+2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
 
     # Choose an action
     action=random.choice((0,1,2,3))
@@ -91,6 +111,10 @@ def event_handler(game,event):
         #action = random.choice(options)
         action=np.argmax(qtable[state])
 
+    # Avoid infinite loop
+    if attempt>game.grid_height*game.grid_width:
+        return(-1)
+
     # Update current state Q
     if last_state != None:
         qtable[last_state,last_action]=qtable[last_state,last_action]+0.7*(reward+0.9*np.max(qtable[state])-qtable[last_state,last_action])
@@ -106,13 +130,18 @@ def event_handler(game,event):
     elif action==3:
         snake_action=9
     game.direction=snake_action
+    return(0)
 
 if os.path.exists("qtable.txt"):
     qtable=np.loadtxt("qtable.txt")
+
+perf=0
 for i in range(0,10000):
     last_state=None
     last_action=None
     score=game.run(event_handler=event_handler)
-    if i%100 == 0:
+    attempt=0
+    if i%10 == 0:
         np.savetxt('qtable.txt',qtable)
-    print("Game ended with "+str(score))
-\ No newline at end of file
+    perf=max(perf,score)
+    print("Game ended with "+str(score)+"  best so far is "+str(perf))
+\ No newline at end of file
author	Loic Guegan <manzerbredes@mailbox.org>	2022-11-01 22:12:14 +0100
committer	Loic Guegan <manzerbredes@mailbox.org>	2022-11-01 22:12:14 +0100
commit	a1469f368b01828342f70e906d436b5849a1d737 (patch)
tree	8a9f47a5458e96803fcd7ed8d8312dcdd3f1b5a4 /qlearning.py
parent	3b35b6866d4abbc4eb446ab8a4a06c305305325b (diff)