Skip to content

Commit

Permalink
bayesian optimization added, code ported to pytorch
Browse files Browse the repository at this point in the history
  • Loading branch information
maurock committed Dec 8, 2020
1 parent 9e8335c commit 55bbb47
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 129 deletions.
2 changes: 0 additions & 2 deletions .gitattributes

This file was deleted.

107 changes: 84 additions & 23 deletions DQN.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
import random
import numpy as np
import pandas as pd
from operator import add
import collections
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import copy
DEVICE = 'cpu' # 'cuda' if torch.cuda.is_available() else 'cpu'

class DQNAgent(object):
class DQNAgent(torch.nn.Module):
def __init__(self, params):
super().__init__()
self.reward = 0
self.gamma = 0.9
self.dataframe = pd.DataFrame()
Expand All @@ -24,22 +28,43 @@ def __init__(self, params):
self.memory = collections.deque(maxlen=params['memory_size'])
self.weights = params['weights_path']
self.load_weights = params['load_weights']
self.model = self.network()

self.optimizer = None
self.network()

def network(self):
model = Sequential()
model.add(Dense(output_dim=self.first_layer, activation='relu', input_dim=11))
model.add(Dense(output_dim=self.second_layer, activation='relu'))
model.add(Dense(output_dim=self.third_layer, activation='relu'))
model.add(Dense(output_dim=3, activation='softmax'))
opt = Adam(self.learning_rate)
model.compile(loss='mse', optimizer=opt)

# Layers
self.f1 = nn.Linear(11, self.first_layer)
self.f2 = nn.Linear(self.first_layer, self.second_layer)
self.f3 = nn.Linear(self.second_layer, self.third_layer)
self.f4 = nn.Linear(self.third_layer, 3)
# weights
if self.load_weights:
model.load_weights(self.weights)
return model
self.model = self.load_state_dict(torch.load(self.weights))
print("weights loaded")

def forward(self, x):
x = F.relu(self.f1(x))
x = F.relu(self.f2(x))
x = F.relu(self.f3(x))
x = F.softmax(self.f4(x), dim=-1)
return x

def get_state(self, game, player, food):
"""
Return the state.
The state is a numpy array of 11 values, representing:
- Danger 1 OR 2 steps ahead
- Danger 1 OR 2 steps on the right
- Danger 1 OR 2 steps on the left
- Snake is moving left
- Snake is moving right
- Snake is moving up
- Snake is moving down
- The food is on the left
- The food is on the right
- The food is on the upper side
- The food is on the lower side
"""
state = [
(player.x_change == 20 and player.y_change == 0 and ((list(map(add, player.position[-1], [20, 0])) in player.position) or
player.position[-1][0] + 20 >= (game.game_width - 20))) or (player.x_change == -20 and player.y_change == 0 and ((list(map(add, player.position[-1], [-20, 0])) in player.position) or
Expand Down Expand Up @@ -70,7 +95,7 @@ def get_state(self, game, player, food):
food.x_food > player.x, # food right
food.y_food < player.y, # food up
food.y_food > player.y # food down
]
]

for i in range(len(state)):
if state[i]:
Expand All @@ -81,6 +106,13 @@ def get_state(self, game, player, food):
return np.asarray(state)

def set_reward(self, player, crash):
"""
Return the reward.
The reward is:
-10 when Snake crashes.
+10 when Snake eats food
0 otherwise
"""
self.reward = 0
if crash:
self.reward = -10
Expand All @@ -90,25 +122,54 @@ def set_reward(self, player, crash):
return self.reward

def remember(self, state, action, reward, next_state, done):
"""
Store the <state, action, reward, next_state, is_done> tuple in a
memory buffer for replay memory.
"""
self.memory.append((state, action, reward, next_state, done))

def replay_new(self, memory, batch_size):
"""
Replay memory.
"""
if len(memory) > batch_size:
minibatch = random.sample(memory, batch_size)
else:
minibatch = memory
for state, action, reward, next_state, done in minibatch:
self.train()
torch.set_grad_enabled(True)
target = reward
next_state_tensor = torch.tensor(np.expand_dims(next_state, 0), dtype=torch.float32).to(DEVICE)
state_tensor = torch.tensor(np.expand_dims(state, 0), dtype=torch.float32, requires_grad=True).to(DEVICE)
if not done:
target = reward + self.gamma * np.amax(self.model.predict(np.array([next_state]))[0])
target_f = self.model.predict(np.array([state]))
target = reward + self.gamma * torch.max(self.forward(next_state_tensor)[0])
output = self.forward(state_tensor)
target_f = output.clone()
target_f[0][np.argmax(action)] = target
self.model.fit(np.array([state]), target_f, epochs=1, verbose=0)
target_f.detach()
self.optimizer.zero_grad()
loss = F.mse_loss(output, target_f)
loss.backward()
self.optimizer.step()

def train_short_memory(self, state, action, reward, next_state, done):
"""
Train the DQN agent on the <state, action, reward, next_state, is_done>
tuple at the current timestep.
"""
self.train()
torch.set_grad_enabled(True)
target = reward
next_state_tensor = torch.tensor(next_state.reshape((1, 11)), dtype=torch.float32).to(DEVICE)
state_tensor = torch.tensor(state.reshape((1, 11)), dtype=torch.float32, requires_grad=True).to(DEVICE)
if not done:
target = reward + self.gamma * np.amax(self.model.predict(next_state.reshape((1, 11)))[0])
target_f = self.model.predict(state.reshape((1, 11)))
target = reward + self.gamma * torch.max(self.forward(next_state_tensor[0]))
output = self.forward(state_tensor)
target_f = output.clone()
target_f[0][np.argmax(action)] = target
self.model.fit(state.reshape((1, 11)), target_f, epochs=1, verbose=0)
target_f.detach()
self.optimizer.zero_grad()
loss = F.mse_loss(output, target_f)
loss.backward()
self.optimizer.step()
28 changes: 22 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
# Deep Reinforcement Learning
## Project: Train AI to play Snake
*UPDATE:*

This project has been recently updated:
- The code of Deep Reinforcement Learning was ported from Keras/TF to Pytorch. To see the original version of the code in Keras/TF, please refer to this repository: [snake-ga-tf](https://github.com/maurock/snake-ga-tf).
- I added Bayesian Optimization to optimize some parameters of Deep RL.

## Introduction
The goal of this project is to develop an AI Bot able to learn how to play the popular game Snake from scratch. In order to do it, I implemented a Deep Reinforcement Learning algorithm. This approach consists in giving the system parameters related to its state, and a positive or negative reward based on its actions. No rules about the game are given, and initially the Bot has no information on what it needs to do. The goal for the system is to figure it out and elaborate a strategy to maximize the score - or the reward.
We are going to see how a Deep Q-Learning algorithm learns how to play snake, scoring up to 50 points and showing a solid strategy after only 5 minutes of training.
The goal of this project is to develop an AI Bot able to learn how to play the popular game Snake from scratch. In order to do it, I implemented a Deep Reinforcement Learning algorithm. This approach consists in giving the system parameters related to its state, and a positive or negative reward based on its actions. No rules about the game are given, and initially the Bot has no information on what it needs to do. The goal for the system is to figure it out and elaborate a strategy to maximize the score - or the reward. \
We are going to see how a Deep Q-Learning algorithm learns how to play Snake, scoring up to 50 points and showing a solid strategy after only 5 minutes of training. \
Additionally, it is possible to run the Bayesian Optimization method to find the optimal parameters of the Deep neural network, as well as some parameters of the Deep RL approach.

## Install
This project requires Python 3.6 with the pygame library installed, as well as Keras with Tensorflow backend.
This project requires Python 3.6 with the pygame library installed, as well as Pytorch. \
The full list of requirements is in `requirements.txt`.
```bash
git clone [email protected]:maurock/snake-ga.git
```

## Run
To run the game, executes in the snake-ga folder:
To run and show the game, executes in the snake-ga folder:

```python
python snakeClass.py --display=True --speed=50
Expand All @@ -22,15 +29,24 @@ Arguments description:
- --display - Type bool, default True, display or not game view
- --speed - Type integer, default 50, game speed

This will run and show the agent. The default configuration loads the file *weights/weights.hdf5* and runs a test.
The Deep neural network can be customized in the file snakeClass.py modifying the dictionary *params* in the function *define_parameters()*
The default configuration loads the file *weights/weights.hdf5* and runs a test.
The parameters of the Deep neural network can be changed in *snakeClass.py* by modifying the dictionary `params` in the function `define_parameters()`

To train the agent, set in the file snakeClass.py:
- params['load_weights'] = False
- params['train'] = True

In snakeClass.py you can set argument *--display*=False and *--speed*=0, if you do not want to see the game running. This speeds up the training phase.

## Optimize Deep RL with Bayesian Optimization
To optimize the Deep neural network and additional parameters, run:

```python
python snakeClass.py --bayesianopt=True
```

This method uses Bayesian optimization to optimize some parameters of Deep RL. The parameters and the features' search space can be modified in *bayesOpt.py*, by editing the `optim_params` dictionary in `optimize_RL`.

## For Mac users
It seems there is a OSX specific problem, since many users cannot see the game running.
To fix this problem, in update_screen(), add this line.
Expand Down
70 changes: 7 additions & 63 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,65 +1,9 @@
absl-py==0.8.0
astor==0.8.0
blinker==1.4
brotlipy==0.7.0
cachetools==4.1.0
certifi==2020.4.5.2
cffi==1.14.0
chardet==3.0.4
click==7.1.2
cmake-example==0.0.1
cryptography==2.9.2
cycler==0.10.0
gast==0.2.2
google-auth==1.14.1
google-auth-oauthlib==0.4.1
google-pasta==0.1.7
grpcio==1.27.2
h5py==2.10.0
idna==2.9
Keras==2.3.1
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.0
kiwisolver==1.2.0
Markdown==3.1.1
matplotlib==3.2.0
mkl-fft==1.1.0
mkl-random==1.1.1
mkl-service==2.3.0
msgpack-numpy==0.4.4.3
numpy==1.18.1
oauthlib==3.1.0
opt-einsum==3.1.0
Keras==2.2.4
numpy==1.17.2
torch==1.4.0
seaborn==0.9.0
pygame==1.9.3
pandas==0.25.1
protobuf==3.12.3
pyasn1==0.4.8
pyasn1-modules==0.2.7
pycparser==2.20
pygame==1.9.6
PyJWT==1.7.1
pyOpenSSL==19.1.0
pyparsing==2.4.7
pyreadline==2.1
PySocks==1.7.1
python-dateutil==2.8.1
pytz==2020.1
PyYAML==5.3.1
requests==2.23.0
requests-oauthlib==1.3.0
rsa==4.0
scipy==1.4.1
seaborn==0.10.1
six==1.15.0
tabulate==0.8.3
tensorboard==2.2.1
tensorboard-plugin-wit==1.6.0
tensorflow==2.1.0
tensorflow-estimator==1.14.0
tensorpack==0.9.4
termcolor==1.1.0
tgan==0.1.0
urllib3==1.25.9
Werkzeug==0.16.1
win-inet-pton==1.1.0
wincertstore==0.2
wrapt==1.11.2
GPyOpt==1.2.6
numpy==1.19.4
Loading

0 comments on commit 55bbb47

Please sign in to comment.