1 files changed, 29 insertions, 16 deletions
diff --git a/qlearning.py b/qlearning.py
index 492039d..cbb90db 100755
--- a/qlearning.py
+++ b/qlearning.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-import sys,random
+import sys,random,os
 import numpy as np
 
 # Import snake game
@@ -31,7 +31,7 @@ qtable=np.zeros((4096, 4))
 
 
 
-game=Snake()
+game=Snake(length=4,fps=200)
 
 def isWall(h,game):
     if h[0]<0 or h[1]<0 or h[0] >= game.grid_width or h[1] >= game.grid_height:
@@ -41,8 +41,9 @@ def isWall(h,game):
 
 last_state=None
 last_action=None
+attempt=0
 def event_handler(game,event):
-    global last_state,last_action
+    global last_state,last_action,attempt
     
     h=game.snake[0]
     left=(h[0]-1,h[1])
@@ -56,10 +57,10 @@ def event_handler(game,event):
     snake_go_down=(game.direction==6)
     snake_go_left=(game.direction==9)
 
-    apple_up=(up==a)
-    apple_right=(right==a)
-    apple_down=(down==a)
-    apple_left=(left==a)
+    apple_up=(a[1]<h[1])
+    apple_right=(a[0]>h[0])
+    apple_down=(a[1]>h[1])
+    apple_left=(a[0]<h[0])
 
     obstacle_up=(up in game.snake or isWall(up, game))
     obstacle_right=(right in game.snake or isWall(right, game))
@@ -67,24 +68,32 @@ def event_handler(game,event):
     obstacle_left=(left in game.snake or isWall(left, game))
 
     reward=0
-    if event==1:
-        reward=1
-    elif event==-1:
+    if event==0:
+        attempt+=1
+    if event==-1:
         reward=-10
-
+        attempt=0
+    elif event==1:
+        reward=1
+        attempt=0
+    # Avoid infinite loop
+    if attempt>3000:
+        reward=-1
+        attempt=0
     # This come from me I do not now if it is the best way to identify a state
     state=2**11*snake_go_up+2**10*snake_go_right+2**9*snake_go_down+2**8*snake_go_left+2**7*apple_up+2**6*apple_right+2**5*apple_down+2**4*apple_left+2**3*obstacle_up+2**2*obstacle_right+2**1*obstacle_down+obstacle_left
 
     # Choose an action
     action=random.choice((0,1,2,3))
     if np.max(qtable[state]) > 0:
-        action = np.argmax(qtable[state])
-
-    print(np.max(qtable[state]))
+        #qactions=qtable[state]
+        #options=np.flatnonzero(qactions == np.max(qactions)) # Since Q value might be equals for several actions
+        #action = random.choice(options)
+        action=np.argmax(qtable[state])
 
     # Update current state Q
     if last_state != None:
-        qtable[last_state,last_action]=qtable[last_state,last_action]+0.5*(reward+0.5*qtable[state])
+        qtable[last_state,last_action]=qtable[last_state,last_action]+0.7*(reward+0.9*np.max(qtable[state])-qtable[last_state,last_action])
     last_state=state
     last_action=action
 
@@ -98,8 +107,12 @@ def event_handler(game,event):
         snake_action=9
     game.direction=snake_action
 
-for i in range(0,10):
+if os.path.exists("qtable.txt"):
+    qtable=np.loadtxt("qtable.txt")
+for i in range(0,10000):
     last_state=None
     last_action=None
     score=game.run(event_handler=event_handler)
+    if i%100 == 0:
+        np.savetxt('qtable.txt',qtable)
     print("Game ended with "+str(score))
 \ No newline at end of file