introduction-to-deep-learning/Intelligence Artificielle d.../1. Self_Driving_Car/ai2.py

# AI for Self Driving Car

# Importing the libraries

import numpy as np
import random
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable

# Creating the architecture of the Neural Network

class Network(nn.Module):
    
    def __init__(self, input_size, nb_action):
        super(Network, self).__init__()
        self.input_size = input_size
        self.nb_action = nb_action
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, nb_action)
    
    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        q_values = self.fc4(x)
        return q_values

# Implementing Experience Replay

class ReplayMemory(object):
    
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
    
    def push(self, event):
        self.memory.append(event)
        if len(self.memory) > self.capacity:
            del self.memory[0]
    
    def sample(self, batch_size):
        samples = zip(*random.sample(self.memory, batch_size))
        return map(lambda x: Variable(torch.cat(x, 0)), samples)

# Implementing Deep Q Learning

class Dqn():
    
    def __init__(self, input_size, nb_action, gamma):
        self.gamma = gamma
        self.reward_window = []
        self.model = Network(input_size, nb_action)
        self.memory = ReplayMemory(100000)
        self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)
        self.last_state = torch.Tensor(input_size).unsqueeze(0)
        self.last_action = 0
        self.last_reward = 0
    
    def select_action(self, state):
        probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100
        action = probs.multinomial(num_samples=1)
        return action.data[0,0]
    
    def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
        outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
        next_outputs = self.model(batch_next_state).detach().max(1)[0]
        target = self.gamma*next_outputs + batch_reward
        td_loss = F.smooth_l1_loss(outputs, target)
        self.optimizer.zero_grad()
        td_loss.backward()
        self.optimizer.step()
    
    def update(self, reward, new_signal):
        new_state = torch.Tensor(new_signal).float().unsqueeze(0)
        self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
        action = self.select_action(new_state)
        if len(self.memory.memory) > 100:
            batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)
            self.learn(batch_state, batch_next_state, batch_reward, batch_action)
        self.last_action = action
        self.last_state = new_state
        self.last_reward = reward
        self.reward_window.append(reward)
        if len(self.reward_window) > 1000:
            del self.reward_window[0]
        return action
    
    def score(self):
        return sum(self.reward_window)/(len(self.reward_window)+1.)
    
    def save(self):
        torch.save({'state_dict': self.model.state_dict(),
                    'optimizer' : self.optimizer.state_dict(),
                   }, 'last_brain.pth')
    
    def load(self):
        if os.path.isfile('last_brain.pth'):
            print("=> loading checkpoint... ")
            checkpoint = torch.load('last_brain.pth')
            self.model.load_state_dict(checkpoint['state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
            print("done !")
        else:
            print("no checkpoint found...")
Import 2023-08-21 15:09:08 +00:00			`# AI for Self Driving Car`

			`# Importing the libraries`

			`import numpy as np`
			`import random`
			`import os`
			`import torch`
			`import torch.nn as nn`
			`import torch.nn.functional as F`
			`import torch.optim as optim`
			`import torch.autograd as autograd`
			`from torch.autograd import Variable`

			`# Creating the architecture of the Neural Network`

			`class Network(nn.Module):`

			`def __init__(self, input_size, nb_action):`
			`super(Network, self).__init__()`
			`self.input_size = input_size`
			`self.nb_action = nb_action`
			`self.fc1 = nn.Linear(input_size, 64)`
			`self.fc2 = nn.Linear(64, 32)`
			`self.fc3 = nn.Linear(32, 16)`
			`self.fc4 = nn.Linear(16, nb_action)`

			`def forward(self, state):`
			`x = F.relu(self.fc1(state))`
			`x = F.relu(self.fc2(x))`
			`x = F.relu(self.fc3(x))`
			`q_values = self.fc4(x)`
			`return q_values`

			`# Implementing Experience Replay`

			`class ReplayMemory(object):`

			`def __init__(self, capacity):`
			`self.capacity = capacity`
			`self.memory = []`

			`def push(self, event):`
			`self.memory.append(event)`
			`if len(self.memory) > self.capacity:`
			`del self.memory[0]`

			`def sample(self, batch_size):`
			`samples = zip(*random.sample(self.memory, batch_size))`
			`return map(lambda x: Variable(torch.cat(x, 0)), samples)`

			`# Implementing Deep Q Learning`

			`class Dqn():`

			`def __init__(self, input_size, nb_action, gamma):`
			`self.gamma = gamma`
			`self.reward_window = []`
			`self.model = Network(input_size, nb_action)`
			`self.memory = ReplayMemory(100000)`
			`self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)`
			`self.last_state = torch.Tensor(input_size).unsqueeze(0)`
			`self.last_action = 0`
			`self.last_reward = 0`

			`def select_action(self, state):`
			`probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100`
			`action = probs.multinomial(num_samples=1)`
			`return action.data[0,0]`

			`def learn(self, batch_state, batch_next_state, batch_reward, batch_action):`
			`outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)`
			`next_outputs = self.model(batch_next_state).detach().max(1)[0]`
			`target = self.gamma*next_outputs + batch_reward`
			`td_loss = F.smooth_l1_loss(outputs, target)`
			`self.optimizer.zero_grad()`
			`td_loss.backward()`
			`self.optimizer.step()`

			`def update(self, reward, new_signal):`
			`new_state = torch.Tensor(new_signal).float().unsqueeze(0)`
			`self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))`
			`action = self.select_action(new_state)`
			`if len(self.memory.memory) > 100:`
			`batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)`
			`self.learn(batch_state, batch_next_state, batch_reward, batch_action)`
			`self.last_action = action`
			`self.last_state = new_state`
			`self.last_reward = reward`
			`self.reward_window.append(reward)`
			`if len(self.reward_window) > 1000:`
			`del self.reward_window[0]`
			`return action`

			`def score(self):`
			`return sum(self.reward_window)/(len(self.reward_window)+1.)`

			`def save(self):`
			`torch.save({'state_dict': self.model.state_dict(),`
			`'optimizer' : self.optimizer.state_dict(),`
			`}, 'last_brain.pth')`

			`def load(self):`
			`if os.path.isfile('last_brain.pth'):`
			`print("=> loading checkpoint... ")`
			`checkpoint = torch.load('last_brain.pth')`
			`self.model.load_state_dict(checkpoint['state_dict'])`
			`self.optimizer.load_state_dict(checkpoint['optimizer'])`
			`print("done !")`
			`else:`
			`print("no checkpoint found...")`