Import
This commit is contained in:
commit
5870182dad
107
Intelligence Artificielle de A a Z/1. Self_Driving_Car/ai.py
Normal file
107
Intelligence Artificielle de A a Z/1. Self_Driving_Car/ai.py
Normal file
@ -0,0 +1,107 @@
|
||||
# AI for Self Driving Car
|
||||
|
||||
# Importing the libraries
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
import os
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
import torch.autograd as autograd
|
||||
from torch.autograd import Variable
|
||||
|
||||
# Creating the architecture of the Neural Network
|
||||
|
||||
class Network(nn.Module):
|
||||
|
||||
def __init__(self, input_size, nb_action):
|
||||
super(Network, self).__init__()
|
||||
self.input_size = input_size
|
||||
self.nb_action = nb_action
|
||||
self.fc1 = nn.Linear(input_size, 30)
|
||||
self.fc2 = nn.Linear(30, nb_action)
|
||||
|
||||
def forward(self, state):
|
||||
x = F.relu(self.fc1(state))
|
||||
q_values = self.fc2(x)
|
||||
return q_values
|
||||
|
||||
# Implementing Experience Replay
|
||||
|
||||
class ReplayMemory(object):
|
||||
|
||||
def __init__(self, capacity):
|
||||
self.capacity = capacity
|
||||
self.memory = []
|
||||
|
||||
def push(self, event):
|
||||
self.memory.append(event)
|
||||
if len(self.memory) > self.capacity:
|
||||
del self.memory[0]
|
||||
|
||||
def sample(self, batch_size):
|
||||
samples = zip(*random.sample(self.memory, batch_size))
|
||||
return map(lambda x: Variable(torch.cat(x, 0)), samples)
|
||||
|
||||
# Implementing Deep Q Learning
|
||||
|
||||
class Dqn():
|
||||
|
||||
def __init__(self, input_size, nb_action, gamma):
|
||||
self.gamma = gamma
|
||||
self.reward_window = []
|
||||
self.model = Network(input_size, nb_action)
|
||||
self.memory = ReplayMemory(100000)
|
||||
self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)
|
||||
self.last_state = torch.Tensor(input_size).unsqueeze(0)
|
||||
self.last_action = 0
|
||||
self.last_reward = 0
|
||||
|
||||
def select_action(self, state):
|
||||
probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100
|
||||
action = probs.multinomial(num_samples=1)
|
||||
return action.data[0,0]
|
||||
|
||||
def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
|
||||
outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
|
||||
next_outputs = self.model(batch_next_state).detach().max(1)[0]
|
||||
target = self.gamma*next_outputs + batch_reward
|
||||
td_loss = F.smooth_l1_loss(outputs, target)
|
||||
self.optimizer.zero_grad()
|
||||
td_loss.backward()
|
||||
self.optimizer.step()
|
||||
|
||||
def update(self, reward, new_signal):
|
||||
new_state = torch.Tensor(new_signal).float().unsqueeze(0)
|
||||
self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
|
||||
action = self.select_action(new_state)
|
||||
if len(self.memory.memory) > 100:
|
||||
batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)
|
||||
self.learn(batch_state, batch_next_state, batch_reward, batch_action)
|
||||
self.last_action = action
|
||||
self.last_state = new_state
|
||||
self.last_reward = reward
|
||||
self.reward_window.append(reward)
|
||||
if len(self.reward_window) > 1000:
|
||||
del self.reward_window[0]
|
||||
return action
|
||||
|
||||
def score(self):
|
||||
return sum(self.reward_window)/(len(self.reward_window)+1.)
|
||||
|
||||
def save(self):
|
||||
torch.save({'state_dict': self.model.state_dict(),
|
||||
'optimizer' : self.optimizer.state_dict(),
|
||||
}, 'last_brain.pth')
|
||||
|
||||
def load(self):
|
||||
if os.path.isfile('last_brain.pth'):
|
||||
print("=> loading checkpoint... ")
|
||||
checkpoint = torch.load('last_brain.pth')
|
||||
self.model.load_state_dict(checkpoint['state_dict'])
|
||||
self.optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
print("done !")
|
||||
else:
|
||||
print("no checkpoint found...")
|
111
Intelligence Artificielle de A a Z/1. Self_Driving_Car/ai2.py
Normal file
111
Intelligence Artificielle de A a Z/1. Self_Driving_Car/ai2.py
Normal file
@ -0,0 +1,111 @@
|
||||
# AI for Self Driving Car
|
||||
|
||||
# Importing the libraries
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
import os
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
import torch.autograd as autograd
|
||||
from torch.autograd import Variable
|
||||
|
||||
# Creating the architecture of the Neural Network
|
||||
|
||||
class Network(nn.Module):
|
||||
|
||||
def __init__(self, input_size, nb_action):
|
||||
super(Network, self).__init__()
|
||||
self.input_size = input_size
|
||||
self.nb_action = nb_action
|
||||
self.fc1 = nn.Linear(input_size, 64)
|
||||
self.fc2 = nn.Linear(64, 32)
|
||||
self.fc3 = nn.Linear(32, 16)
|
||||
self.fc4 = nn.Linear(16, nb_action)
|
||||
|
||||
def forward(self, state):
|
||||
x = F.relu(self.fc1(state))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = F.relu(self.fc3(x))
|
||||
q_values = self.fc4(x)
|
||||
return q_values
|
||||
|
||||
# Implementing Experience Replay
|
||||
|
||||
class ReplayMemory(object):
|
||||
|
||||
def __init__(self, capacity):
|
||||
self.capacity = capacity
|
||||
self.memory = []
|
||||
|
||||
def push(self, event):
|
||||
self.memory.append(event)
|
||||
if len(self.memory) > self.capacity:
|
||||
del self.memory[0]
|
||||
|
||||
def sample(self, batch_size):
|
||||
samples = zip(*random.sample(self.memory, batch_size))
|
||||
return map(lambda x: Variable(torch.cat(x, 0)), samples)
|
||||
|
||||
# Implementing Deep Q Learning
|
||||
|
||||
class Dqn():
|
||||
|
||||
def __init__(self, input_size, nb_action, gamma):
|
||||
self.gamma = gamma
|
||||
self.reward_window = []
|
||||
self.model = Network(input_size, nb_action)
|
||||
self.memory = ReplayMemory(100000)
|
||||
self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)
|
||||
self.last_state = torch.Tensor(input_size).unsqueeze(0)
|
||||
self.last_action = 0
|
||||
self.last_reward = 0
|
||||
|
||||
def select_action(self, state):
|
||||
probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100
|
||||
action = probs.multinomial(num_samples=1)
|
||||
return action.data[0,0]
|
||||
|
||||
def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
|
||||
outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
|
||||
next_outputs = self.model(batch_next_state).detach().max(1)[0]
|
||||
target = self.gamma*next_outputs + batch_reward
|
||||
td_loss = F.smooth_l1_loss(outputs, target)
|
||||
self.optimizer.zero_grad()
|
||||
td_loss.backward()
|
||||
self.optimizer.step()
|
||||
|
||||
def update(self, reward, new_signal):
|
||||
new_state = torch.Tensor(new_signal).float().unsqueeze(0)
|
||||
self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
|
||||
action = self.select_action(new_state)
|
||||
if len(self.memory.memory) > 100:
|
||||
batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)
|
||||
self.learn(batch_state, batch_next_state, batch_reward, batch_action)
|
||||
self.last_action = action
|
||||
self.last_state = new_state
|
||||
self.last_reward = reward
|
||||
self.reward_window.append(reward)
|
||||
if len(self.reward_window) > 1000:
|
||||
del self.reward_window[0]
|
||||
return action
|
||||
|
||||
def score(self):
|
||||
return sum(self.reward_window)/(len(self.reward_window)+1.)
|
||||
|
||||
def save(self):
|
||||
torch.save({'state_dict': self.model.state_dict(),
|
||||
'optimizer' : self.optimizer.state_dict(),
|
||||
}, 'last_brain.pth')
|
||||
|
||||
def load(self):
|
||||
if os.path.isfile('last_brain.pth'):
|
||||
print("=> loading checkpoint... ")
|
||||
checkpoint = torch.load('last_brain.pth')
|
||||
self.model.load_state_dict(checkpoint['state_dict'])
|
||||
self.optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
print("done !")
|
||||
else:
|
||||
print("no checkpoint found...")
|
@ -0,0 +1,59 @@
|
||||
#:kivy 1.0.9
|
||||
# ref: https://kivy.org/docs/tutorials/pong.html
|
||||
|
||||
<Car>:
|
||||
size: 20, 10
|
||||
canvas:
|
||||
PushMatrix
|
||||
Rotate:
|
||||
angle: self.angle
|
||||
origin: self.center
|
||||
Rectangle:
|
||||
pos: self.pos
|
||||
size: self.size
|
||||
PopMatrix
|
||||
|
||||
<Ball1>:
|
||||
size: 10,10
|
||||
canvas:
|
||||
Color:
|
||||
rgba: 1,0,0,1
|
||||
Ellipse:
|
||||
pos: self.pos
|
||||
size: self.size
|
||||
<Ball2>:
|
||||
size: 10,10
|
||||
canvas:
|
||||
Color:
|
||||
rgba: 0,1,1,1
|
||||
Ellipse:
|
||||
pos: self.pos
|
||||
size: self.size
|
||||
|
||||
<Ball3>:
|
||||
size: 10,10
|
||||
canvas:
|
||||
Color:
|
||||
rgba: 1,1,0,1
|
||||
Ellipse:
|
||||
pos: self.pos
|
||||
size: self.size
|
||||
|
||||
<Game>:
|
||||
car: game_car
|
||||
ball1: game_ball1
|
||||
ball2: game_ball2
|
||||
ball3: game_ball3
|
||||
|
||||
Car:
|
||||
id: game_car
|
||||
center: self.parent.center
|
||||
Ball1:
|
||||
id: game_ball1
|
||||
center: self.parent.center
|
||||
Ball2:
|
||||
id: game_ball2
|
||||
center: self.parent.center
|
||||
Ball3:
|
||||
id: game_ball3
|
||||
center: self.parent.center
|
235
Intelligence Artificielle de A a Z/1. Self_Driving_Car/map.py
Normal file
235
Intelligence Artificielle de A a Z/1. Self_Driving_Car/map.py
Normal file
@ -0,0 +1,235 @@
|
||||
# Self Driving Car
|
||||
|
||||
# Importing the libraries
|
||||
import numpy as np
|
||||
from random import random, randint
|
||||
import matplotlib.pyplot as plt
|
||||
import time
|
||||
|
||||
# Importing the Kivy packages
|
||||
from kivy.app import App
|
||||
from kivy.uix.widget import Widget
|
||||
from kivy.uix.button import Button
|
||||
from kivy.graphics import Color, Ellipse, Line
|
||||
from kivy.config import Config
|
||||
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
|
||||
from kivy.vector import Vector
|
||||
from kivy.clock import Clock
|
||||
|
||||
# Importing the Dqn object from our AI in ai.py
|
||||
from ai import Dqn
|
||||
|
||||
# Adding this line if we don't want the right click to put a red point
|
||||
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
|
||||
|
||||
# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
|
||||
last_x = 0
|
||||
last_y = 0
|
||||
n_points = 0
|
||||
length = 0
|
||||
|
||||
# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
|
||||
brain = Dqn(5,3,0.9)
|
||||
action2rotation = [0,20,-20]
|
||||
last_reward = 0
|
||||
scores = []
|
||||
|
||||
# Initializing the map
|
||||
first_update = True
|
||||
def init():
|
||||
global sand
|
||||
global goal_x
|
||||
global goal_y
|
||||
global first_update
|
||||
sand = np.zeros((longueur,largeur))
|
||||
goal_x = 20
|
||||
goal_y = largeur - 20
|
||||
first_update = False
|
||||
|
||||
# Initializing the last distance
|
||||
last_distance = 0
|
||||
|
||||
# Creating the car class
|
||||
|
||||
class Car(Widget):
|
||||
|
||||
angle = NumericProperty(0)
|
||||
rotation = NumericProperty(0)
|
||||
velocity_x = NumericProperty(0)
|
||||
velocity_y = NumericProperty(0)
|
||||
velocity = ReferenceListProperty(velocity_x, velocity_y)
|
||||
sensor1_x = NumericProperty(0)
|
||||
sensor1_y = NumericProperty(0)
|
||||
sensor1 = ReferenceListProperty(sensor1_x, sensor1_y)
|
||||
sensor2_x = NumericProperty(0)
|
||||
sensor2_y = NumericProperty(0)
|
||||
sensor2 = ReferenceListProperty(sensor2_x, sensor2_y)
|
||||
sensor3_x = NumericProperty(0)
|
||||
sensor3_y = NumericProperty(0)
|
||||
sensor3 = ReferenceListProperty(sensor3_x, sensor3_y)
|
||||
signal1 = NumericProperty(0)
|
||||
signal2 = NumericProperty(0)
|
||||
signal3 = NumericProperty(0)
|
||||
|
||||
def move(self, rotation):
|
||||
self.pos = Vector(*self.velocity) + self.pos
|
||||
self.rotation = rotation
|
||||
self.angle = self.angle + self.rotation
|
||||
self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos
|
||||
self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos
|
||||
self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos
|
||||
self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400.
|
||||
self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400.
|
||||
self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400.
|
||||
if self.sensor1_x>longueur-10 or self.sensor1_x<10 or self.sensor1_y>largeur-10 or self.sensor1_y<10:
|
||||
self.signal1 = 1.
|
||||
if self.sensor2_x>longueur-10 or self.sensor2_x<10 or self.sensor2_y>largeur-10 or self.sensor2_y<10:
|
||||
self.signal2 = 1.
|
||||
if self.sensor3_x>longueur-10 or self.sensor3_x<10 or self.sensor3_y>largeur-10 or self.sensor3_y<10:
|
||||
self.signal3 = 1.
|
||||
|
||||
class Ball1(Widget):
|
||||
pass
|
||||
class Ball2(Widget):
|
||||
pass
|
||||
class Ball3(Widget):
|
||||
pass
|
||||
|
||||
# Creating the game class
|
||||
|
||||
class Game(Widget):
|
||||
|
||||
car = ObjectProperty(None)
|
||||
ball1 = ObjectProperty(None)
|
||||
ball2 = ObjectProperty(None)
|
||||
ball3 = ObjectProperty(None)
|
||||
|
||||
def serve_car(self):
|
||||
self.car.center = self.center
|
||||
self.car.velocity = Vector(6, 0)
|
||||
|
||||
def update(self, dt):
|
||||
|
||||
global brain
|
||||
global last_reward
|
||||
global scores
|
||||
global last_distance
|
||||
global goal_x
|
||||
global goal_y
|
||||
global longueur
|
||||
global largeur
|
||||
|
||||
longueur = self.width
|
||||
largeur = self.height
|
||||
if first_update:
|
||||
init()
|
||||
|
||||
xx = goal_x - self.car.x
|
||||
yy = goal_y - self.car.y
|
||||
orientation = Vector(*self.car.velocity).angle((xx,yy))/180.
|
||||
last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation]
|
||||
action = brain.update(last_reward, last_signal)
|
||||
scores.append(brain.score())
|
||||
rotation = action2rotation[action]
|
||||
self.car.move(rotation)
|
||||
distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2)
|
||||
self.ball1.pos = self.car.sensor1
|
||||
self.ball2.pos = self.car.sensor2
|
||||
self.ball3.pos = self.car.sensor3
|
||||
|
||||
if sand[int(self.car.x),int(self.car.y)] > 0:
|
||||
self.car.velocity = Vector(1, 0).rotate(self.car.angle)
|
||||
last_reward = -1
|
||||
else: # otherwise
|
||||
self.car.velocity = Vector(6, 0).rotate(self.car.angle)
|
||||
last_reward = -0.2
|
||||
if distance < last_distance:
|
||||
last_reward = 0.1
|
||||
|
||||
if self.car.x < 10:
|
||||
self.car.x = 10
|
||||
last_reward = -1
|
||||
if self.car.x > self.width - 10:
|
||||
self.car.x = self.width - 10
|
||||
last_reward = -1
|
||||
if self.car.y < 10:
|
||||
self.car.y = 10
|
||||
last_reward = -1
|
||||
if self.car.y > self.height - 10:
|
||||
self.car.y = self.height - 10
|
||||
last_reward = -1
|
||||
|
||||
if distance < 100:
|
||||
goal_x = self.width-goal_x
|
||||
goal_y = self.height-goal_y
|
||||
last_distance = distance
|
||||
|
||||
# Adding the painting tools
|
||||
|
||||
class MyPaintWidget(Widget):
|
||||
|
||||
def on_touch_down(self, touch):
|
||||
global length, n_points, last_x, last_y
|
||||
with self.canvas:
|
||||
Color(0.8,0.7,0)
|
||||
d = 10.
|
||||
touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10)
|
||||
last_x = int(touch.x)
|
||||
last_y = int(touch.y)
|
||||
n_points = 0
|
||||
length = 0
|
||||
sand[int(touch.x),int(touch.y)] = 1
|
||||
|
||||
def on_touch_move(self, touch):
|
||||
global length, n_points, last_x, last_y
|
||||
if touch.button == 'left':
|
||||
touch.ud['line'].points += [touch.x, touch.y]
|
||||
x = int(touch.x)
|
||||
y = int(touch.y)
|
||||
length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2))
|
||||
n_points += 1.
|
||||
density = n_points/(length)
|
||||
touch.ud['line'].width = int(20 * density + 1)
|
||||
sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1
|
||||
last_x = x
|
||||
last_y = y
|
||||
|
||||
# Adding the API Buttons (clear, save and load)
|
||||
|
||||
class CarApp(App):
|
||||
|
||||
def build(self):
|
||||
parent = Game()
|
||||
parent.serve_car()
|
||||
Clock.schedule_interval(parent.update, 1.0/60.0)
|
||||
self.painter = MyPaintWidget()
|
||||
clearbtn = Button(text = 'clear')
|
||||
savebtn = Button(text = 'save', pos = (parent.width, 0))
|
||||
loadbtn = Button(text = 'load', pos = (2 * parent.width, 0))
|
||||
clearbtn.bind(on_release = self.clear_canvas)
|
||||
savebtn.bind(on_release = self.save)
|
||||
loadbtn.bind(on_release = self.load)
|
||||
parent.add_widget(self.painter)
|
||||
parent.add_widget(clearbtn)
|
||||
parent.add_widget(savebtn)
|
||||
parent.add_widget(loadbtn)
|
||||
return parent
|
||||
|
||||
def clear_canvas(self, obj):
|
||||
global sand
|
||||
self.painter.canvas.clear()
|
||||
sand = np.zeros((longueur,largeur))
|
||||
|
||||
def save(self, obj):
|
||||
print("saving brain...")
|
||||
brain.save()
|
||||
plt.plot(scores)
|
||||
plt.show()
|
||||
|
||||
def load(self, obj):
|
||||
print("loading last saved brain...")
|
||||
brain.load()
|
||||
|
||||
# Running the whole thing
|
||||
if __name__ == '__main__':
|
||||
CarApp().run()
|
249
Intelligence Artificielle de A a Z/1. Self_Driving_Car/map2.py
Normal file
249
Intelligence Artificielle de A a Z/1. Self_Driving_Car/map2.py
Normal file
@ -0,0 +1,249 @@
|
||||
# Self Driving Car
|
||||
|
||||
# Importing the libraries
|
||||
import numpy as np
|
||||
from random import random, randint
|
||||
import matplotlib.pyplot as plt
|
||||
import time
|
||||
|
||||
# Importing the Kivy packages
|
||||
from kivy.app import App
|
||||
from kivy.uix.widget import Widget
|
||||
from kivy.uix.button import Button
|
||||
from kivy.graphics import Color, Ellipse, Line
|
||||
from kivy.config import Config
|
||||
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
|
||||
from kivy.vector import Vector
|
||||
from kivy.clock import Clock
|
||||
|
||||
# Importing the Dqn object from our AI in ai.py
|
||||
from ai2 import Dqn
|
||||
|
||||
# Implémenter un timer
|
||||
from timeit import default_timer
|
||||
starting_time = default_timer()
|
||||
duration_time = default_timer() - starting_time
|
||||
|
||||
# Adding this line if we don't want the right click to put a red point
|
||||
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
|
||||
|
||||
# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
|
||||
last_x = 0
|
||||
last_y = 0
|
||||
n_points = 0
|
||||
length = 0
|
||||
|
||||
# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
|
||||
brain = Dqn(6,3,0.9)
|
||||
action2rotation = [0,20,-20]
|
||||
last_reward = 0
|
||||
scores = []
|
||||
|
||||
# Initializing the map
|
||||
first_update = True
|
||||
def init():
|
||||
global sand
|
||||
global goal_x
|
||||
global goal_y
|
||||
global first_update
|
||||
sand = np.zeros((longueur,largeur))
|
||||
goal_x = 20
|
||||
goal_y = largeur - 20
|
||||
first_update = False
|
||||
|
||||
# Initializing the last distance
|
||||
last_distance = 0
|
||||
|
||||
# Creating the car class
|
||||
|
||||
class Car(Widget):
|
||||
|
||||
angle = NumericProperty(0)
|
||||
rotation = NumericProperty(0)
|
||||
velocity_x = NumericProperty(0)
|
||||
velocity_y = NumericProperty(0)
|
||||
velocity = ReferenceListProperty(velocity_x, velocity_y)
|
||||
sensor1_x = NumericProperty(0)
|
||||
sensor1_y = NumericProperty(0)
|
||||
sensor1 = ReferenceListProperty(sensor1_x, sensor1_y)
|
||||
sensor2_x = NumericProperty(0)
|
||||
sensor2_y = NumericProperty(0)
|
||||
sensor2 = ReferenceListProperty(sensor2_x, sensor2_y)
|
||||
sensor3_x = NumericProperty(0)
|
||||
sensor3_y = NumericProperty(0)
|
||||
sensor3 = ReferenceListProperty(sensor3_x, sensor3_y)
|
||||
signal1 = NumericProperty(0)
|
||||
signal2 = NumericProperty(0)
|
||||
signal3 = NumericProperty(0)
|
||||
|
||||
def move(self, rotation):
|
||||
self.pos = Vector(*self.velocity) + self.pos
|
||||
self.rotation = rotation
|
||||
self.angle = self.angle + self.rotation
|
||||
self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos
|
||||
self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos
|
||||
self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos
|
||||
self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400.
|
||||
self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400.
|
||||
self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400.
|
||||
if self.sensor1_x>longueur-10 or self.sensor1_x<10 or self.sensor1_y>largeur-10 or self.sensor1_y<10:
|
||||
self.signal1 = 1.
|
||||
if self.sensor2_x>longueur-10 or self.sensor2_x<10 or self.sensor2_y>largeur-10 or self.sensor2_y<10:
|
||||
self.signal2 = 1.
|
||||
if self.sensor3_x>longueur-10 or self.sensor3_x<10 or self.sensor3_y>largeur-10 or self.sensor3_y<10:
|
||||
self.signal3 = 1.
|
||||
|
||||
class Ball1(Widget):
|
||||
pass
|
||||
class Ball2(Widget):
|
||||
pass
|
||||
class Ball3(Widget):
|
||||
pass
|
||||
|
||||
# Creating the game class
|
||||
|
||||
class Game(Widget):
|
||||
|
||||
car = ObjectProperty(None)
|
||||
ball1 = ObjectProperty(None)
|
||||
ball2 = ObjectProperty(None)
|
||||
ball3 = ObjectProperty(None)
|
||||
|
||||
def serve_car(self):
|
||||
self.car.center = self.center
|
||||
self.car.velocity = Vector(6, 0)
|
||||
|
||||
def update(self, dt):
|
||||
|
||||
global brain
|
||||
global last_reward
|
||||
global scores
|
||||
global last_distance
|
||||
global goal_x
|
||||
global goal_y
|
||||
global longueur
|
||||
global largeur
|
||||
global starting_time
|
||||
global duration_time
|
||||
|
||||
longueur = self.width
|
||||
largeur = self.height
|
||||
if first_update:
|
||||
init()
|
||||
|
||||
xx = goal_x - self.car.x
|
||||
yy = goal_y - self.car.y
|
||||
orientation = Vector(*self.car.velocity).angle((xx,yy))/180.
|
||||
last_signal = [self.car.signal1, self.car.signal2, self.car.signal3,
|
||||
orientation, -orientation, duration_time]
|
||||
action = brain.update(last_reward, last_signal)
|
||||
scores.append(brain.score())
|
||||
rotation = action2rotation[action]
|
||||
self.car.move(rotation)
|
||||
distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2)
|
||||
self.ball1.pos = self.car.sensor1
|
||||
self.ball2.pos = self.car.sensor2
|
||||
self.ball3.pos = self.car.sensor3
|
||||
|
||||
if sand[int(self.car.x),int(self.car.y)] > 0:
|
||||
self.car.velocity = Vector(1, 0).rotate(self.car.angle)
|
||||
last_reward = -1.5
|
||||
else: # otherwise
|
||||
self.car.velocity = Vector(6, 0).rotate(self.car.angle)
|
||||
last_reward = -0.1
|
||||
if distance < last_distance:
|
||||
last_reward = 0.1
|
||||
|
||||
if self.car.x < 10:
|
||||
self.car.x = 10
|
||||
last_reward = -1
|
||||
if self.car.x > self.width - 10:
|
||||
self.car.x = self.width - 10
|
||||
last_reward = -1
|
||||
if self.car.y < 10:
|
||||
self.car.y = 10
|
||||
last_reward = -1
|
||||
if self.car.y > self.height - 10:
|
||||
self.car.y = self.height - 10
|
||||
last_reward = -1
|
||||
|
||||
if distance < 100:
|
||||
goal_x = self.width-goal_x
|
||||
goal_y = self.height-goal_y
|
||||
last_reward = 2
|
||||
starting_time = default_timer()
|
||||
|
||||
duration_time = default_timer() - starting_time
|
||||
if duration_time > 10:
|
||||
last_reward = last_reward - 0.2
|
||||
last_distance = distance
|
||||
|
||||
# Adding the painting tools
|
||||
|
||||
class MyPaintWidget(Widget):
|
||||
|
||||
def on_touch_down(self, touch):
|
||||
global length, n_points, last_x, last_y
|
||||
with self.canvas:
|
||||
Color(0.8,0.7,0)
|
||||
d = 10.
|
||||
touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10)
|
||||
last_x = int(touch.x)
|
||||
last_y = int(touch.y)
|
||||
n_points = 0
|
||||
length = 0
|
||||
sand[int(touch.x),int(touch.y)] = 1
|
||||
|
||||
def on_touch_move(self, touch):
|
||||
global length, n_points, last_x, last_y
|
||||
if touch.button == 'left':
|
||||
touch.ud['line'].points += [touch.x, touch.y]
|
||||
x = int(touch.x)
|
||||
y = int(touch.y)
|
||||
length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2))
|
||||
n_points += 1.
|
||||
density = n_points/(length)
|
||||
touch.ud['line'].width = int(20 * density + 1)
|
||||
sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1
|
||||
last_x = x
|
||||
last_y = y
|
||||
|
||||
# Adding the API Buttons (clear, save and load)
|
||||
|
||||
class CarApp(App):
|
||||
|
||||
def build(self):
|
||||
parent = Game()
|
||||
parent.serve_car()
|
||||
Clock.schedule_interval(parent.update, 1.0/60.0)
|
||||
self.painter = MyPaintWidget()
|
||||
clearbtn = Button(text = 'clear')
|
||||
savebtn = Button(text = 'save', pos = (parent.width, 0))
|
||||
loadbtn = Button(text = 'load', pos = (2 * parent.width, 0))
|
||||
clearbtn.bind(on_release = self.clear_canvas)
|
||||
savebtn.bind(on_release = self.save)
|
||||
loadbtn.bind(on_release = self.load)
|
||||
parent.add_widget(self.painter)
|
||||
parent.add_widget(clearbtn)
|
||||
parent.add_widget(savebtn)
|
||||
parent.add_widget(loadbtn)
|
||||
return parent
|
||||
|
||||
def clear_canvas(self, obj):
|
||||
global sand
|
||||
self.painter.canvas.clear()
|
||||
sand = np.zeros((longueur,largeur))
|
||||
|
||||
def save(self, obj):
|
||||
print("saving brain...")
|
||||
brain.save()
|
||||
plt.plot(scores)
|
||||
plt.show()
|
||||
|
||||
def load(self, obj):
|
||||
print("loading last saved brain...")
|
||||
brain.load()
|
||||
|
||||
# Running the whole thing
|
||||
if __name__ == '__main__':
|
||||
CarApp().run()
|
@ -0,0 +1,236 @@
|
||||
# Self Driving Car
|
||||
|
||||
# Importing the libraries
|
||||
import numpy as np
|
||||
from random import random, randint
|
||||
import matplotlib.pyplot as plt
|
||||
import time
|
||||
|
||||
# Importing the Kivy packages
|
||||
from kivy.app import App
|
||||
from kivy.uix.widget import Widget
|
||||
from kivy.uix.button import Button
|
||||
from kivy.graphics import Color, Ellipse, Line
|
||||
from kivy.config import Config
|
||||
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
|
||||
from kivy.vector import Vector
|
||||
from kivy.clock import Clock
|
||||
|
||||
# Importing the Dqn object from our AI in ia.py
|
||||
from ai import Dqn
|
||||
|
||||
# Adding this line if we don't want the right click to put a red point
|
||||
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
|
||||
|
||||
# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
|
||||
last_x = 0
|
||||
last_y = 0
|
||||
n_points = 0 # the total number of points in the last drawing
|
||||
length = 0 # the length of the last drawing
|
||||
|
||||
# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
|
||||
brain = Dqn(5,3,0.9) # 5 sensors, 3 actions, gama = 0.9
|
||||
action2rotation = [0,20,-20] # action = 0 => no rotation, action = 1 => rotate 20 degres, action = 2 => rotate -20 degres
|
||||
last_reward = 0 # initializing the last reward
|
||||
scores = [] # initializing the mean score curve (sliding window of the rewards) with respect to time
|
||||
|
||||
# Initializing the map
|
||||
first_update = True # using this trick to initialize the map only once
|
||||
def init():
|
||||
global sand # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise.
|
||||
global goal_x # x-coordinate of the goal (where the car has to go, that is the airport or the downtown)
|
||||
global goal_y # y-coordinate of the goal (where the car has to go, that is the airport or the downtown)
|
||||
sand = np.zeros((longueur,largeur)) # initializing the sand array with only zeros
|
||||
goal_x = 20 # the goal to reach is at the upper left of the map (the x-coordinate is 20 and not 0 because the car gets bad reward if it touches the wall)
|
||||
goal_y = largeur - 20 # the goal to reach is at the upper left of the map (y-coordinate)
|
||||
first_update = False # trick to initialize the map only once
|
||||
|
||||
# Initializing the last distance
|
||||
last_distance = 0
|
||||
|
||||
# Creating the car class (to understand "NumericProperty" and "ReferenceListProperty", see kivy tutorials: https://kivy.org/docs/tutorials/pong.html)
|
||||
|
||||
class Car(Widget):
|
||||
|
||||
angle = NumericProperty(0) # initializing the angle of the car (angle between the x-axis of the map and the axis of the car)
|
||||
rotation = NumericProperty(0) # initializing the last rotation of the car (after playing the action, the car does a rotation of 0, 20 or -20 degrees)
|
||||
velocity_x = NumericProperty(0) # initializing the x-coordinate of the velocity vector
|
||||
velocity_y = NumericProperty(0) # initializing the y-coordinate of the velocity vector
|
||||
velocity = ReferenceListProperty(velocity_x, velocity_y) # velocity vector
|
||||
sensor1_x = NumericProperty(0) # initializing the x-coordinate of the first sensor (the one that looks forward)
|
||||
sensor1_y = NumericProperty(0) # initializing the y-coordinate of the first sensor (the one that looks forward)
|
||||
sensor1 = ReferenceListProperty(sensor1_x, sensor1_y) # first sensor vector
|
||||
sensor2_x = NumericProperty(0) # initializing the x-coordinate of the second sensor (the one that looks 30 degrees to the left)
|
||||
sensor2_y = NumericProperty(0) # initializing the y-coordinate of the second sensor (the one that looks 30 degrees to the left)
|
||||
sensor2 = ReferenceListProperty(sensor2_x, sensor2_y) # second sensor vector
|
||||
sensor3_x = NumericProperty(0) # initializing the x-coordinate of the third sensor (the one that looks 30 degrees to the right)
|
||||
sensor3_y = NumericProperty(0) # initializing the y-coordinate of the third sensor (the one that looks 30 degrees to the right)
|
||||
sensor3 = ReferenceListProperty(sensor3_x, sensor3_y) # third sensor vector
|
||||
signal1 = NumericProperty(0) # initializing the signal received by sensor 1
|
||||
signal2 = NumericProperty(0) # initializing the signal received by sensor 2
|
||||
signal3 = NumericProperty(0) # initializing the signal received by sensor 3
|
||||
|
||||
def move(self, rotation):
|
||||
self.pos = Vector(*self.velocity) + self.pos # updating the position of the car according to its last position and velocity
|
||||
self.rotation = rotation # getting the rotation of the car
|
||||
self.angle = self.angle + self.rotation # updating the angle
|
||||
self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos # updating the position of sensor 1
|
||||
self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos # updating the position of sensor 2
|
||||
self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos # updating the position of sensor 3
|
||||
self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400. # getting the signal received by sensor 1 (density of sand around sensor 1)
|
||||
self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400. # getting the signal received by sensor 2 (density of sand around sensor 2)
|
||||
self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400. # getting the signal received by sensor 3 (density of sand around sensor 3)
|
||||
if self.sensor1_x > longueur-10 or self.sensor1_x<10 or self.sensor1_y>largeur-10 or self.sensor1_y<10: # if sensor 1 is out of the map (the car is facing one edge of the map)
|
||||
self.signal1 = 1. # sensor 1 detects full sand
|
||||
if self.sensor2_x > longueur-10 or self.sensor2_x<10 or self.sensor2_y>largeur-10 or self.sensor2_y<10: # if sensor 2 is out of the map (the car is facing one edge of the map)
|
||||
self.signal2 = 1. # sensor 2 detects full sand
|
||||
if self.sensor3_x > longueur-10 or self.sensor3_x<10 or self.sensor3_y>largeur-10 or self.sensor3_y<10: # if sensor 3 is out of the map (the car is facing one edge of the map)
|
||||
self.signal3 = 1. # sensor 3 detects full sand
|
||||
|
||||
class Ball1(Widget): # sensor 1 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
|
||||
pass
|
||||
class Ball2(Widget): # sensor 2 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
|
||||
pass
|
||||
class Ball3(Widget): # sensor 3 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
|
||||
pass
|
||||
|
||||
# Creating the game class (to understand "ObjectProperty", see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
|
||||
|
||||
class Game(Widget):
|
||||
|
||||
car = ObjectProperty(None) # getting the car object from our kivy file
|
||||
ball1 = ObjectProperty(None) # getting the sensor 1 object from our kivy file
|
||||
ball2 = ObjectProperty(None) # getting the sensor 2 object from our kivy file
|
||||
ball3 = ObjectProperty(None) # getting the sensor 3 object from our kivy file
|
||||
|
||||
def serve_car(self): # starting the car when we launch the application
|
||||
self.car.center = self.center # the car will start at the center of the map
|
||||
self.car.velocity = Vector(6, 0) # the car will start to go horizontally to the right with a speed of 6
|
||||
|
||||
def update(self, dt): # the big update function that updates everything that needs to be updated at each discrete time t when reaching a new state (getting new signals from the sensors)
|
||||
|
||||
global brain # specifying the global variables (the brain of the car, that is our AI)
|
||||
global last_reward # specifying the global variables (the last reward)
|
||||
global scores # specifying the global variables (the means of the rewards)
|
||||
global last_distance # specifying the global variables (the last distance from the car to the goal)
|
||||
global goal_x # specifying the global variables (x-coordinate of the goal)
|
||||
global goal_y # specifying the global variables (y-coordinate of the goal)
|
||||
global longueur # specifying the global variables (width of the map)
|
||||
global largeur # specifying the global variables (height of the map)
|
||||
|
||||
longueur = self.width # width of the map (horizontal edge)
|
||||
largeur = self.height # height of the map (vertical edge)
|
||||
if first_update: # trick to initialize the map only once
|
||||
init()
|
||||
|
||||
xx = goal_x - self.car.x # difference of x-coordinates between the goal and the car
|
||||
yy = goal_y - self.car.y # difference of y-coordinates between the goal and the car
|
||||
orientation = Vector(*self.car.velocity).angle((xx,yy))/180. # direction of the car with respect to the goal (if the car is heading perfectly towards the goal, then orientation = 0)
|
||||
last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation] # our input state vector, composed of the three signals received by the three sensors, plus the orientation and -orientation
|
||||
action = brain.update(last_reward, last_signal) # playing the action from our ai (the object brain of the dqn class)
|
||||
scores.append(brain.score()) # appending the score (mean of the last 100 rewards to the reward window)
|
||||
rotation = action2rotation[action] # converting the action played (0, 1 or 2) into the rotation angle (0°, 20° or -20°)
|
||||
self.car.move(rotation) # moving the car according to this last rotation angle
|
||||
distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2) # getting the new distance between the car and the goal right after the car moved
|
||||
self.ball1.pos = self.car.sensor1 # updating the position of the first sensor (ball1) right after the car moved
|
||||
self.ball2.pos = self.car.sensor2 # updating the position of the second sensor (ball2) right after the car moved
|
||||
self.ball3.pos = self.car.sensor3 # updating the position of the third sensor (ball3) right after the car moved
|
||||
|
||||
if sand[int(self.car.x),int(self.car.y)] > 0: # if the car is on the sand
|
||||
self.car.velocity = Vector(1, 0).rotate(self.car.angle) # it is slowed down (speed = 1)
|
||||
last_reward = -1 # and reward = -1
|
||||
else: # otherwise
|
||||
self.car.velocity = Vector(6, 0).rotate(self.car.angle) # it goes to a normal speed (speed = 6)
|
||||
last_reward = -0.2 # and it gets bad reward (-0.2)
|
||||
if distance < last_distance: # however if it getting close to the goal
|
||||
last_reward = 0.1 # it still gets slightly positive reward 0.1
|
||||
|
||||
if self.car.x < 10: # if the car is in the left edge of the frame
|
||||
self.car.x = 10 # it is not slowed down
|
||||
last_reward = -1 # but it gets bad reward -1
|
||||
if self.car.x > self.width-10: # if the car is in the right edge of the frame
|
||||
self.car.x = self.width-10 # it is not slowed down
|
||||
last_reward = -1 # but it gets bad reward -1
|
||||
if self.car.y < 10: # if the car is in the bottom edge of the frame
|
||||
self.car.y = 10 # it is not slowed down
|
||||
last_reward = -1 # but it gets bad reward -1
|
||||
if self.car.y > self.height-10: # if the car is in the upper edge of the frame
|
||||
self.car.y = self.height-10 # it is not slowed down
|
||||
last_reward = -1 # but it gets bad reward -1
|
||||
|
||||
if distance < 100: # when the car reaches its goal
|
||||
goal_x = self.width - goal_x # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the x-coordinate of the goal)
|
||||
goal_y = self.height - goal_y # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the y-coordinate of the goal)
|
||||
|
||||
# Updating the last distance from the car to the goal
|
||||
last_distance = distance
|
||||
|
||||
# Painting for graphic interface (see kivy tutorials: https://kivy.org/docs/tutorials/firstwidget.html)
|
||||
|
||||
class MyPaintWidget(Widget):
|
||||
|
||||
def on_touch_down(self, touch): # putting some sand when we do a left click
|
||||
global length,n_points,last_x,last_y
|
||||
with self.canvas:
|
||||
Color(0.8,0.7,0)
|
||||
d=10.
|
||||
touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10)
|
||||
last_x = int(touch.x)
|
||||
last_y = int(touch.y)
|
||||
n_points = 0
|
||||
length = 0
|
||||
sand[int(touch.x),int(touch.y)] = 1
|
||||
|
||||
def on_touch_move(self, touch): # putting some sand when we move the mouse while pressing left
|
||||
global length,n_points,last_x,last_y
|
||||
if touch.button=='left':
|
||||
touch.ud['line'].points += [touch.x, touch.y]
|
||||
x = int(touch.x)
|
||||
y = int(touch.y)
|
||||
length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2))
|
||||
n_points += 1.
|
||||
density = n_points/(length)
|
||||
touch.ud['line'].width = int(20*density + 1)
|
||||
sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1
|
||||
last_x = x
|
||||
last_y = y
|
||||
|
||||
# API and switches interface (see kivy tutorials: https://kivy.org/docs/tutorials/pong.html)
|
||||
|
||||
class CarApp(App):
|
||||
|
||||
def build(self): # building the app
|
||||
parent = Game()
|
||||
parent.serve_car()
|
||||
Clock.schedule_interval(parent.update, 1.0 / 60.0)
|
||||
self.painter = MyPaintWidget()
|
||||
clearbtn = Button(text='clear')
|
||||
savebtn = Button(text='save',pos=(parent.width,0))
|
||||
loadbtn = Button(text='load',pos=(2*parent.width,0))
|
||||
clearbtn.bind(on_release=self.clear_canvas)
|
||||
savebtn.bind(on_release=self.save)
|
||||
loadbtn.bind(on_release=self.load)
|
||||
parent.add_widget(self.painter)
|
||||
parent.add_widget(clearbtn)
|
||||
parent.add_widget(savebtn)
|
||||
parent.add_widget(loadbtn)
|
||||
return parent
|
||||
|
||||
def clear_canvas(self, obj): # clear button
|
||||
global sand
|
||||
self.painter.canvas.clear()
|
||||
sand = np.zeros((longueur,largeur))
|
||||
|
||||
def save(self, obj): # save button
|
||||
print("saving brain...")
|
||||
brain.save()
|
||||
plt.plot(scores)
|
||||
plt.show()
|
||||
|
||||
def load(self, obj): # load button
|
||||
print("loading last saved brain...")
|
||||
brain.load()
|
||||
|
||||
# Running the app
|
||||
if __name__ == '__main__':
|
||||
CarApp().run()
|
155
Intelligence Artificielle de A a Z/2. Doom/ai.py
Normal file
155
Intelligence Artificielle de A a Z/2. Doom/ai.py
Normal file
@ -0,0 +1,155 @@
|
||||
# AI for Doom
|
||||
|
||||
|
||||
|
||||
# Importing the libraries
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torch.autograd import Variable
|
||||
|
||||
# Importing the packages for OpenAI and Doom
|
||||
import gym
|
||||
from gym import wrappers
|
||||
import vizdoomgym
|
||||
|
||||
# Importing the other Python files
|
||||
import experience_replay, image_preprocessing
|
||||
|
||||
|
||||
|
||||
# Part 1 - Building the AI
|
||||
|
||||
# Making the brain
|
||||
|
||||
class CNN(nn.Module):
|
||||
|
||||
def __init__(self, number_actions):
|
||||
super(CNN, self).__init__()
|
||||
self.convolution1 = nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 5)
|
||||
self.convolution2 = nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 3)
|
||||
self.convolution3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 2)
|
||||
self.fc1 = nn.Linear(in_features = self.count_neurons((1, 80, 80)), out_features = 40)
|
||||
self.fc2 = nn.Linear(in_features = 40, out_features = number_actions)
|
||||
|
||||
def count_neurons(self, image_dim):
|
||||
x = Variable(torch.rand(1, *image_dim))
|
||||
x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2))
|
||||
x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2))
|
||||
x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2))
|
||||
return x.data.view(1, -1).size(1)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2))
|
||||
x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2))
|
||||
x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2))
|
||||
x = x.view(x.size(0), -1)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.fc2(x)
|
||||
return x
|
||||
|
||||
# Making the body
|
||||
|
||||
class SoftmaxBody(nn.Module):
|
||||
|
||||
def __init__(self, T):
|
||||
super(SoftmaxBody, self).__init__()
|
||||
self.T = T
|
||||
|
||||
def forward(self, outputs):
|
||||
probs = F.softmax(outputs * self.T)
|
||||
actions = probs.multinomial(num_samples=1)
|
||||
return actions
|
||||
|
||||
# Making the AI
|
||||
|
||||
class AI:
|
||||
|
||||
def __init__(self, brain, body):
|
||||
self.brain = brain
|
||||
self.body = body
|
||||
|
||||
def __call__(self, inputs):
|
||||
input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32)))
|
||||
output = self.brain(input)
|
||||
actions = self.body(output)
|
||||
return actions.data.numpy()
|
||||
|
||||
|
||||
|
||||
# Part 2 - Training the AI with Deep Convolutional Q-Learning
|
||||
|
||||
# Getting the Doom environment
|
||||
doom_env = image_preprocessing.PreprocessImage(gym.make("VizdoomCorridor-v0"), width=80, height=80, grayscale=True)
|
||||
doom_env = wrappers.Monitor(doom_env, "videos", force = True)
|
||||
number_actions = doom_env.action_space.n
|
||||
|
||||
# Building an AI
|
||||
cnn = CNN(number_actions)
|
||||
softmax_body = SoftmaxBody(T = 1.0)
|
||||
ai = AI(brain = cnn, body = softmax_body)
|
||||
|
||||
# Setting up Experience Replay
|
||||
n_steps = experience_replay.NStepProgress(env = doom_env, ai = ai, n_step = 10)
|
||||
memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000)
|
||||
|
||||
# Implementing Eligibility Trace
|
||||
def eligibility_trace(batch):
|
||||
gamma = 0.99
|
||||
inputs = []
|
||||
targets = []
|
||||
for series in batch:
|
||||
input = Variable(torch.from_numpy(np.array([series[0].state, series[-1].state], dtype = np.float32)))
|
||||
output = cnn(input)
|
||||
cumul_reward = 0.0 if series[-1].done else output[1].data.max()
|
||||
for step in reversed(series[:-1]):
|
||||
cumul_reward = step.reward + gamma * cumul_reward
|
||||
state = series[0].state
|
||||
target = output[0].data
|
||||
target[series[0].action] = cumul_reward
|
||||
inputs.append(state)
|
||||
targets.append(target)
|
||||
return torch.from_numpy(np.array(inputs, dtype = np.float32)), torch.stack(targets)
|
||||
|
||||
# Making the moving average on 100 steps
|
||||
class MA:
|
||||
def __init__(self, size):
|
||||
self.list_of_rewards = []
|
||||
self.size = size
|
||||
def add(self, rewards):
|
||||
if isinstance(rewards, list):
|
||||
self.list_of_rewards += rewards
|
||||
else:
|
||||
self.list_of_rewards.append(rewards)
|
||||
while len(self.list_of_rewards) > self.size:
|
||||
del self.list_of_rewards[0]
|
||||
def average(self):
|
||||
return np.mean(self.list_of_rewards)
|
||||
ma = MA(100)
|
||||
|
||||
# Training the AI
|
||||
loss = nn.MSELoss()
|
||||
optimizer = optim.Adam(cnn.parameters(), lr = 0.001)
|
||||
nb_epochs = 100
|
||||
for epoch in range(1, nb_epochs + 1):
|
||||
memory.run_steps(200)
|
||||
for batch in memory.sample_batch(128):
|
||||
inputs, targets = eligibility_trace(batch)
|
||||
inputs, targets = Variable(inputs), Variable(targets)
|
||||
predictions = cnn(inputs)
|
||||
loss_error = loss(predictions, targets)
|
||||
optimizer.zero_grad()
|
||||
loss_error.backward()
|
||||
optimizer.step()
|
||||
rewards_steps = n_steps.rewards_steps()
|
||||
ma.add(rewards_steps)
|
||||
avg_reward = ma.average()
|
||||
print("Epoch: %s, Average Reward: %s" % (str(epoch), str(avg_reward)))
|
||||
if avg_reward >= 1500:
|
||||
print("Congratulations, your AI wins")
|
||||
break
|
||||
|
||||
# Closing the Doom environment
|
||||
doom_env.close()
|
@ -0,0 +1,75 @@
|
||||
# Experience Replay
|
||||
|
||||
# Importing the libraries
|
||||
import numpy as np
|
||||
from collections import namedtuple, deque
|
||||
|
||||
# Defining one Step
|
||||
Step = namedtuple('Step', ['state', 'action', 'reward', 'done'])
|
||||
|
||||
# Making the AI progress on several (n_step) steps
|
||||
|
||||
class NStepProgress:
|
||||
|
||||
def __init__(self, env, ai, n_step):
|
||||
self.ai = ai
|
||||
self.rewards = []
|
||||
self.env = env
|
||||
self.n_step = n_step
|
||||
|
||||
def __iter__(self):
|
||||
state = self.env.reset()
|
||||
history = deque()
|
||||
reward = 0.0
|
||||
while True:
|
||||
self.env.render()
|
||||
action = self.ai(np.array([state]))[0][0]
|
||||
next_state, r, is_done, _ = self.env.step(action)
|
||||
reward += r
|
||||
history.append(Step(state = state, action = action, reward = r, done = is_done))
|
||||
while len(history) > self.n_step + 1:
|
||||
history.popleft()
|
||||
if len(history) == self.n_step + 1:
|
||||
yield tuple(history)
|
||||
state = next_state
|
||||
if is_done:
|
||||
if len(history) > self.n_step + 1:
|
||||
history.popleft()
|
||||
while len(history) >= 1:
|
||||
yield tuple(history)
|
||||
history.popleft()
|
||||
self.rewards.append(reward)
|
||||
reward = 0.0
|
||||
state = self.env.reset()
|
||||
history.clear()
|
||||
|
||||
def rewards_steps(self):
|
||||
rewards_steps = self.rewards
|
||||
self.rewards = []
|
||||
return rewards_steps
|
||||
|
||||
# Implementing Experience Replay
|
||||
|
||||
class ReplayMemory:
|
||||
|
||||
def __init__(self, n_steps, capacity = 10000):
|
||||
self.capacity = capacity
|
||||
self.n_steps = n_steps
|
||||
self.n_steps_iter = iter(n_steps)
|
||||
self.buffer = deque()
|
||||
|
||||
def sample_batch(self, batch_size): # creates an iterator that returns random batches
|
||||
ofs = 0
|
||||
vals = list(self.buffer)
|
||||
np.random.shuffle(vals)
|
||||
while (ofs+1)*batch_size <= len(self.buffer):
|
||||
yield vals[ofs*batch_size:(ofs+1)*batch_size]
|
||||
ofs += 1
|
||||
|
||||
def run_steps(self, samples):
|
||||
while samples > 0:
|
||||
entry = next(self.n_steps_iter) # 10 consecutive steps
|
||||
self.buffer.append(entry) # we put 200 for the current episode
|
||||
samples -= 1
|
||||
while len(self.buffer) > self.capacity: # we accumulate no more than the capacity (10000)
|
||||
self.buffer.popleft()
|
@ -0,0 +1,28 @@
|
||||
# Image Preprocessing
|
||||
|
||||
# Importing the libraries
|
||||
import numpy as np
|
||||
from scipy.misc import imresize
|
||||
from gym.core import ObservationWrapper
|
||||
from gym.spaces.box import Box
|
||||
|
||||
# Preprocessing the Images
|
||||
|
||||
class PreprocessImage(ObservationWrapper):
|
||||
|
||||
def __init__(self, env, height = 64, width = 64, grayscale = True, crop = lambda img: img):
|
||||
super(PreprocessImage, self).__init__(env)
|
||||
self.img_size = (height, width)
|
||||
self.grayscale = grayscale
|
||||
self.crop = crop
|
||||
n_colors = 1 if self.grayscale else 3
|
||||
self.observation_space = Box(0.0, 1.0, [n_colors, height, width])
|
||||
|
||||
def _observation(self, img):
|
||||
img = self.crop(img)
|
||||
img = imresize(img, self.img_size)
|
||||
if self.grayscale:
|
||||
img = img.mean(-1, keepdims = True)
|
||||
img = np.transpose(img, (2, 0, 1))
|
||||
img = img.astype('float32') / 255.
|
||||
return img
|
BIN
Intelligence Artificielle de A a Z/3. Breakout/Code_No_Comment/.DS_Store
vendored
Normal file
BIN
Intelligence Artificielle de A a Z/3. Breakout/Code_No_Comment/.DS_Store
vendored
Normal file
Binary file not shown.
@ -0,0 +1,67 @@
|
||||
# Improvement of the Gym environment with universe
|
||||
|
||||
|
||||
import cv2
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym.spaces.box import Box
|
||||
from gym import wrappers
|
||||
|
||||
|
||||
# Taken from https://github.com/openai/universe-starter-agent
|
||||
|
||||
|
||||
def create_atari_env(env_id, video=False):
|
||||
env = gym.make(env_id)
|
||||
if video:
|
||||
env = wrappers.Monitor(env, 'test', force=True)
|
||||
env = MyAtariRescale42x42(env)
|
||||
env = MyNormalizedEnv(env)
|
||||
return env
|
||||
|
||||
|
||||
def _process_frame42(frame):
|
||||
frame = frame[34:34 + 160, :160]
|
||||
# Resize by half, then down to 42x42 (essentially mipmapping). If
|
||||
# we resize directly we lose pixels that, when mapped to 42x42,
|
||||
# aren't close enough to the pixel boundary.
|
||||
frame = cv2.resize(frame, (80, 80))
|
||||
frame = cv2.resize(frame, (42, 42))
|
||||
frame = frame.mean(2)
|
||||
frame = frame.astype(np.float32)
|
||||
frame *= (1.0 / 255.0)
|
||||
#frame = np.reshape(frame, [1, 42, 42])
|
||||
return frame
|
||||
|
||||
|
||||
class MyAtariRescale42x42(gym.ObservationWrapper):
|
||||
|
||||
def __init__(self, env=None):
|
||||
super(MyAtariRescale42x42, self).__init__(env)
|
||||
self.observation_space = Box(0.0, 1.0, [1, 42, 42])
|
||||
|
||||
def _observation(self, observation):
|
||||
return _process_frame42(observation)
|
||||
|
||||
|
||||
class MyNormalizedEnv(gym.ObservationWrapper):
|
||||
|
||||
def __init__(self, env=None):
|
||||
super(MyNormalizedEnv, self).__init__(env)
|
||||
self.state_mean = 0
|
||||
self.state_std = 0
|
||||
self.alpha = 0.9999
|
||||
self.num_steps = 0
|
||||
|
||||
def _observation(self, observation):
|
||||
self.num_steps += 1
|
||||
self.state_mean = self.state_mean * self.alpha + \
|
||||
observation.mean() * (1 - self.alpha)
|
||||
self.state_std = self.state_std * self.alpha + \
|
||||
observation.std() * (1 - self.alpha)
|
||||
|
||||
unbiased_mean = self.state_mean / (1 - pow(self.alpha, self.num_steps))
|
||||
unbiased_std = self.state_std / (1 - pow(self.alpha, self.num_steps))
|
||||
|
||||
ret = (observation - unbiased_mean) / (unbiased_std + 1e-8)
|
||||
return np.expand_dims(ret, axis=0)
|
@ -0,0 +1,43 @@
|
||||
# Main code
|
||||
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from envs import create_atari_env
|
||||
from model import ActorCritic
|
||||
from train import train
|
||||
from test import test
|
||||
import my_optim
|
||||
|
||||
# Gathering all the parameters (that we can modify to explore)
|
||||
class Params():
|
||||
def __init__(self):
|
||||
self.lr = 0.0001
|
||||
self.gamma = 0.99
|
||||
self.tau = 1.
|
||||
self.seed = 1
|
||||
self.num_processes = 16
|
||||
self.num_steps = 20
|
||||
self.max_episode_length = 10000
|
||||
self.env_name = 'Breakout-v0'
|
||||
|
||||
# Main run
|
||||
os.environ['OMP_NUM_THREADS'] = '1'
|
||||
params = Params()
|
||||
torch.manual_seed(params.seed)
|
||||
env = create_atari_env(params.env_name)
|
||||
shared_model = ActorCritic(env.observation_space.shape[0], env.action_space)
|
||||
shared_model.share_memory()
|
||||
optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=params.lr)
|
||||
optimizer.share_memory()
|
||||
processes = []
|
||||
p = mp.Process(target=test, args=(params.num_processes, params, shared_model))
|
||||
p.start()
|
||||
processes.append(p)
|
||||
for rank in range(0, params.num_processes):
|
||||
p = mp.Process(target=train, args=(rank, params, shared_model, optimizer))
|
||||
p.start()
|
||||
processes.append(p)
|
||||
for p in processes:
|
||||
p.join()
|
@ -0,0 +1,65 @@
|
||||
# AI for Breakout
|
||||
|
||||
# Importing the librairies
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
# Initializing and setting the variance of a tensor of weights
|
||||
def normalized_columns_initializer(weights, std=1.0):
|
||||
out = torch.randn(weights.size())
|
||||
out *= std / torch.sqrt(out.pow(2).sum(1, True))
|
||||
return out
|
||||
|
||||
# Initializing the weights of the neural network in an optimal way for the learning
|
||||
def weights_init(m):
|
||||
classname = m.__class__.__name__
|
||||
if classname.find('Conv') != -1:
|
||||
weight_shape = list(m.weight.data.size())
|
||||
fan_in = np.prod(weight_shape[1:4])
|
||||
fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]
|
||||
w_bound = np.sqrt(6. / (fan_in + fan_out))
|
||||
m.weight.data.uniform_(-w_bound, w_bound)
|
||||
m.bias.data.fill_(0)
|
||||
elif classname.find('Linear') != -1:
|
||||
weight_shape = list(m.weight.data.size())
|
||||
fan_in = weight_shape[1]
|
||||
fan_out = weight_shape[0]
|
||||
w_bound = np.sqrt(6. / (fan_in + fan_out))
|
||||
m.weight.data.uniform_(-w_bound, w_bound)
|
||||
m.bias.data.fill_(0)
|
||||
|
||||
# Making the A3C brain
|
||||
|
||||
class ActorCritic(torch.nn.Module):
|
||||
|
||||
def __init__(self, num_inputs, action_space):
|
||||
super(ActorCritic, self).__init__()
|
||||
self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)
|
||||
self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
|
||||
self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
|
||||
self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
|
||||
self.lstm = nn.LSTMCell(32 * 3 * 3, 256)
|
||||
num_outputs = action_space.n
|
||||
self.critic_linear = nn.Linear(256, 1)
|
||||
self.actor_linear = nn.Linear(256, num_outputs)
|
||||
self.apply(weights_init)
|
||||
self.actor_linear.weight.data = normalized_columns_initializer(self.actor_linear.weight.data, 0.01)
|
||||
self.actor_linear.bias.data.fill_(0)
|
||||
self.critic_linear.weight.data = normalized_columns_initializer(self.critic_linear.weight.data, 1.0)
|
||||
self.critic_linear.bias.data.fill_(0)
|
||||
self.lstm.bias_ih.data.fill_(0)
|
||||
self.lstm.bias_hh.data.fill_(0)
|
||||
self.train()
|
||||
|
||||
def forward(self, inputs):
|
||||
inputs, (hx, cx) = inputs
|
||||
x = F.elu(self.conv1(inputs))
|
||||
x = F.elu(self.conv2(x))
|
||||
x = F.elu(self.conv3(x))
|
||||
x = F.elu(self.conv4(x))
|
||||
x = x.view(-1, 32 * 3 * 3)
|
||||
hx, cx = self.lstm(x, (hx, cx))
|
||||
x = hx
|
||||
return self.critic_linear(x), self.actor_linear(x), (hx, cx)
|
@ -0,0 +1,46 @@
|
||||
# Optimizer
|
||||
|
||||
import math
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
|
||||
class SharedAdam(optim.Adam):
|
||||
|
||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
|
||||
super(SharedAdam, self).__init__(params, lr, betas, eps, weight_decay)
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
state = self.state[p]
|
||||
state['step'] = torch.zeros(1)
|
||||
state['exp_avg'] = p.data.new().resize_as_(p.data).zero_()
|
||||
state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_()
|
||||
|
||||
def share_memory(self):
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
state = self.state[p]
|
||||
state['step'].share_memory_()
|
||||
state['exp_avg'].share_memory_()
|
||||
state['exp_avg_sq'].share_memory_()
|
||||
|
||||
def step(self):
|
||||
loss = None
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
state = self.state[p]
|
||||
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
|
||||
beta1, beta2 = group['betas']
|
||||
state['step'] += 1
|
||||
if group['weight_decay'] != 0:
|
||||
grad = grad.add(group['weight_decay'], p.data)
|
||||
exp_avg.mul_(beta1).add_(1 - beta1, grad)
|
||||
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
denom = exp_avg_sq.sqrt().add_(group['eps'])
|
||||
bias_correction1 = 1 - beta1 ** state['step'][0]
|
||||
bias_correction2 = 1 - beta2 ** state['step'][0]
|
||||
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
|
||||
p.data.addcdiv_(-step_size, exp_avg, denom)
|
||||
return loss
|
@ -0,0 +1,45 @@
|
||||
# Test Agent
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from envs import create_atari_env
|
||||
from model import ActorCritic
|
||||
from torch.autograd import Variable
|
||||
import time
|
||||
from collections import deque
|
||||
|
||||
def test(rank, params, shared_model):
|
||||
torch.manual_seed(params.seed + rank)
|
||||
env = create_atari_env(params.env_name, video=True)
|
||||
env.seed(params.seed + rank)
|
||||
model = ActorCritic(env.observation_space.shape[0], env.action_space)
|
||||
model.eval()
|
||||
state = env.reset()
|
||||
state = torch.from_numpy(state)
|
||||
reward_sum = 0
|
||||
done = True
|
||||
start_time = time.time()
|
||||
actions = deque(maxlen=100)
|
||||
episode_length = 0
|
||||
while True:
|
||||
episode_length += 1
|
||||
if done:
|
||||
model.load_state_dict(shared_model.state_dict())
|
||||
cx = Variable(torch.zeros(1, 256), volatile=True)
|
||||
hx = Variable(torch.zeros(1, 256), volatile=True)
|
||||
else:
|
||||
cx = Variable(cx.data, volatile=True)
|
||||
hx = Variable(hx.data, volatile=True)
|
||||
value, action_value, (hx, cx) = model((Variable(state.unsqueeze(0), volatile=True), (hx, cx)))
|
||||
prob = F.softmax(action_value)
|
||||
action = prob.max(1)[1].data.numpy()
|
||||
state, reward, done, _ = env.step(action[0])
|
||||
reward_sum += reward
|
||||
if done:
|
||||
print("Time {}, episode reward {}, episode length {}".format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, episode_length))
|
||||
reward_sum = 0
|
||||
episode_length = 0
|
||||
actions.clear()
|
||||
state = env.reset()
|
||||
time.sleep(60)
|
||||
state = torch.from_numpy(state)
|
@ -0,0 +1,77 @@
|
||||
# Training the AI
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from envs import create_atari_env
|
||||
from model import ActorCritic
|
||||
from torch.autograd import Variable
|
||||
|
||||
def ensure_shared_grads(model, shared_model):
|
||||
for param, shared_param in zip(model.parameters(), shared_model.parameters()):
|
||||
if shared_param.grad is not None:
|
||||
return
|
||||
shared_param._grad = param.grad
|
||||
|
||||
def train(rank, params, shared_model, optimizer):
|
||||
torch.manual_seed(params.seed + rank)
|
||||
env = create_atari_env(params.env_name)
|
||||
env.seed(params.seed + rank)
|
||||
model = ActorCritic(env.observation_space.shape[0], env.action_space)
|
||||
state = env.reset()
|
||||
state = torch.from_numpy(state)
|
||||
done = True
|
||||
episode_length = 0
|
||||
while True:
|
||||
episode_length += 1
|
||||
model.load_state_dict(shared_model.state_dict())
|
||||
if done:
|
||||
cx = Variable(torch.zeros(1, 256))
|
||||
hx = Variable(torch.zeros(1, 256))
|
||||
else:
|
||||
cx = Variable(cx.data)
|
||||
hx = Variable(hx.data)
|
||||
values = []
|
||||
log_probs = []
|
||||
rewards = []
|
||||
entropies = []
|
||||
for step in range(params.num_steps):
|
||||
value, action_values, (hx, cx) = model((Variable(state.unsqueeze(0)), (hx, cx)))
|
||||
prob = F.softmax(action_values)
|
||||
log_prob = F.log_softmax(action_values)
|
||||
entropy = -(log_prob * prob).sum(1)
|
||||
entropies.append(entropy)
|
||||
action = prob.multinomial().data
|
||||
log_prob = log_prob.gather(1, Variable(action))
|
||||
values.append(value)
|
||||
log_probs.append(log_prob)
|
||||
state, reward, done, _ = env.step(action.numpy())
|
||||
done = (done or episode_length >= params.max_episode_length)
|
||||
reward = max(min(reward, 1), -1)
|
||||
if done:
|
||||
episode_length = 0
|
||||
state = env.reset()
|
||||
state = torch.from_numpy(state)
|
||||
rewards.append(reward)
|
||||
if done:
|
||||
break
|
||||
R = torch.zeros(1, 1)
|
||||
if not done:
|
||||
value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx)))
|
||||
R = value.data
|
||||
values.append(Variable(R))
|
||||
policy_loss = 0
|
||||
value_loss = 0
|
||||
R = Variable(R)
|
||||
gae = torch.zeros(1, 1)
|
||||
for i in reversed(range(len(rewards))):
|
||||
R = params.gamma * R + rewards[i]
|
||||
advantage = R - values[i]
|
||||
value_loss = value_loss + 0.5 * advantage.pow(2)
|
||||
TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data
|
||||
gae = gae * params.gamma * params.tau + TD
|
||||
policy_loss = policy_loss - log_probs[i] * Variable(gae) - 0.01 * entropies[i]
|
||||
optimizer.zero_grad()
|
||||
(policy_loss + 0.5 * value_loss).backward()
|
||||
torch.nn.utils.clip_grad_norm(model.parameters(), 40)
|
||||
ensure_shared_grads(model, shared_model)
|
||||
optimizer.step()
|
BIN
Intelligence Artificielle de A a Z/3. Breakout/Code_With_Comments/.DS_Store
vendored
Normal file
BIN
Intelligence Artificielle de A a Z/3. Breakout/Code_With_Comments/.DS_Store
vendored
Normal file
Binary file not shown.
@ -0,0 +1,67 @@
|
||||
# Improvement of the Gym environment with universe
|
||||
|
||||
|
||||
import cv2
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym.spaces.box import Box
|
||||
from gym import wrappers
|
||||
|
||||
|
||||
# Taken from https://github.com/openai/universe-starter-agent
|
||||
|
||||
|
||||
def create_atari_env(env_id, video=False):
|
||||
env = gym.make(env_id)
|
||||
if video:
|
||||
env = wrappers.Monitor(env, 'test', force=True)
|
||||
env = MyAtariRescale42x42(env)
|
||||
env = MyNormalizedEnv(env)
|
||||
return env
|
||||
|
||||
|
||||
def _process_frame42(frame):
|
||||
frame = frame[34:34 + 160, :160]
|
||||
# Resize by half, then down to 42x42 (essentially mipmapping). If
|
||||
# we resize directly we lose pixels that, when mapped to 42x42,
|
||||
# aren't close enough to the pixel boundary.
|
||||
frame = cv2.resize(frame, (80, 80))
|
||||
frame = cv2.resize(frame, (42, 42))
|
||||
frame = frame.mean(2)
|
||||
frame = frame.astype(np.float32)
|
||||
frame *= (1.0 / 255.0)
|
||||
#frame = np.reshape(frame, [1, 42, 42])
|
||||
return frame
|
||||
|
||||
|
||||
class MyAtariRescale42x42(gym.ObservationWrapper):
|
||||
|
||||
def __init__(self, env=None):
|
||||
super(MyAtariRescale42x42, self).__init__(env)
|
||||
self.observation_space = Box(0.0, 1.0, [1, 42, 42])
|
||||
|
||||
def _observation(self, observation):
|
||||
return _process_frame42(observation)
|
||||
|
||||
|
||||
class MyNormalizedEnv(gym.ObservationWrapper):
|
||||
|
||||
def __init__(self, env=None):
|
||||
super(MyNormalizedEnv, self).__init__(env)
|
||||
self.state_mean = 0
|
||||
self.state_std = 0
|
||||
self.alpha = 0.9999
|
||||
self.num_steps = 0
|
||||
|
||||
def _observation(self, observation):
|
||||
self.num_steps += 1
|
||||
self.state_mean = self.state_mean * self.alpha + \
|
||||
observation.mean() * (1 - self.alpha)
|
||||
self.state_std = self.state_std * self.alpha + \
|
||||
observation.std() * (1 - self.alpha)
|
||||
|
||||
unbiased_mean = self.state_mean / (1 - pow(self.alpha, self.num_steps))
|
||||
unbiased_std = self.state_std / (1 - pow(self.alpha, self.num_steps))
|
||||
|
||||
ret = (observation - unbiased_mean) / (unbiased_std + 1e-8)
|
||||
return np.expand_dims(ret, axis=0)
|
@ -0,0 +1,43 @@
|
||||
# Main code
|
||||
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import torch
|
||||
import torch.multiprocessing as mp
|
||||
from envs import create_atari_env
|
||||
from model import ActorCritic
|
||||
from train import train
|
||||
from test import test
|
||||
import my_optim
|
||||
|
||||
# Gathering all the parameters (that we can modify to explore)
|
||||
class Params():
|
||||
def __init__(self):
|
||||
self.lr = 0.0001
|
||||
self.gamma = 0.99
|
||||
self.tau = 1.
|
||||
self.seed = 1
|
||||
self.num_processes = 16
|
||||
self.num_steps = 20
|
||||
self.max_episode_length = 10000
|
||||
self.env_name = 'Breakout-v0'
|
||||
|
||||
# Main run
|
||||
os.environ['OMP_NUM_THREADS'] = '1' # 1 thread per core
|
||||
params = Params() # creating the params object from the Params class, that sets all the model parameters
|
||||
torch.manual_seed(params.seed) # setting the seed (not essential)
|
||||
env = create_atari_env(params.env_name) # we create an optimized environment thanks to universe
|
||||
shared_model = ActorCritic(env.observation_space.shape[0], env.action_space) # shared_model is the model shared by the different agents (different threads in different cores)
|
||||
shared_model.share_memory() # storing the model in the shared memory of the computer, which allows the threads to have access to this shared memory even if they are in different cores
|
||||
optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=params.lr) # the optimizer is also shared because it acts on the shared model
|
||||
optimizer.share_memory() # same, we store the optimizer in the shared memory so that all the agents can have access to this shared memory to optimize the model
|
||||
processes = [] # initializing the processes with an empty list
|
||||
p = mp.Process(target=test, args=(params.num_processes, params, shared_model)) # allowing to create the 'test' process with some arguments 'args' passed to the 'test' target function - the 'test' process doesn't update the shared model but uses it on a part of it - torch.multiprocessing.Process runs a function in an independent thread
|
||||
p.start() # starting the created process p
|
||||
processes.append(p) # adding the created process p to the list of processes
|
||||
for rank in range(0, params.num_processes): # making a loop to run all the other processes that will be trained by updating the shared model
|
||||
p = mp.Process(target=train, args=(rank, params, shared_model, optimizer))
|
||||
p.start()
|
||||
processes.append(p)
|
||||
for p in processes: # creating a pointer that will allow to kill all the threads when at least one of the threads, or main.py will be killed, allowing to stop the program safely
|
||||
p.join()
|
@ -0,0 +1,65 @@
|
||||
# AI for Breakout
|
||||
|
||||
# Importing the librairies
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
# Initializing and setting the variance of a tensor of weights
|
||||
def normalized_columns_initializer(weights, std=1.0):
|
||||
out = torch.randn(weights.size())
|
||||
out *= std / torch.sqrt(out.pow(2).sum(1, True)) # thanks to this initialization, we have var(out) = std^2
|
||||
return out
|
||||
|
||||
# Initializing the weights of the neural network in an optimal way for the learning
|
||||
def weights_init(m):
|
||||
classname = m.__class__.__name__ # python trick that will look for the type of connection in the object "m" (convolution or full connection)
|
||||
if classname.find('Conv') != -1: # if the connection is a convolution
|
||||
weight_shape = list(m.weight.data.size()) # list containing the shape of the weights in the object "m"
|
||||
fan_in = np.prod(weight_shape[1:4]) # dim1 * dim2 * dim3
|
||||
fan_out = np.prod(weight_shape[2:4]) * weight_shape[0] # dim0 * dim2 * dim3
|
||||
w_bound = np.sqrt(6. / (fan_in + fan_out)) # weight bound
|
||||
m.weight.data.uniform_(-w_bound, w_bound) # generating some random weights of order inversely proportional to the size of the tensor of weights
|
||||
m.bias.data.fill_(0) # initializing all the bias with zeros
|
||||
elif classname.find('Linear') != -1: # if the connection is a full connection
|
||||
weight_shape = list(m.weight.data.size()) # list containing the shape of the weights in the object "m"
|
||||
fan_in = weight_shape[1] # dim1
|
||||
fan_out = weight_shape[0] # dim0
|
||||
w_bound = np.sqrt(6. / (fan_in + fan_out)) # weight bound
|
||||
m.weight.data.uniform_(-w_bound, w_bound) # generating some random weights of order inversely proportional to the size of the tensor of weights
|
||||
m.bias.data.fill_(0) # initializing all the bias with zeros
|
||||
|
||||
# Making the A3C brain
|
||||
|
||||
class ActorCritic(torch.nn.Module):
|
||||
|
||||
def __init__(self, num_inputs, action_space):
|
||||
super(ActorCritic, self).__init__()
|
||||
self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1) # first convolution
|
||||
self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # second convolution
|
||||
self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # third convolution
|
||||
self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # fourth convolution
|
||||
self.lstm = nn.LSTMCell(32 * 3 * 3, 256) # making an LSTM (Long Short Term Memory) to learn the temporal properties of the input - we obtain a big encoded vector S of size 256 that encodes an event of the game
|
||||
num_outputs = action_space.n # getting the number of possible actions
|
||||
self.critic_linear = nn.Linear(256, 1) # full connection of the critic: output = V(S)
|
||||
self.actor_linear = nn.Linear(256, num_outputs) # full connection of the actor: output = Q(S,A)
|
||||
self.apply(weights_init) # initilizing the weights of the model with random weights
|
||||
self.actor_linear.weight.data = normalized_columns_initializer(self.actor_linear.weight.data, 0.01) # setting the standard deviation of the actor tensor of weights to 0.01
|
||||
self.actor_linear.bias.data.fill_(0) # initializing the actor bias with zeros
|
||||
self.critic_linear.weight.data = normalized_columns_initializer(self.critic_linear.weight.data, 1.0) # setting the standard deviation of the critic tensor of weights to 0.01
|
||||
self.critic_linear.bias.data.fill_(0) # initializing the critic bias with zeros
|
||||
self.lstm.bias_ih.data.fill_(0) # initializing the lstm bias with zeros
|
||||
self.lstm.bias_hh.data.fill_(0) # initializing the lstm bias with zeros
|
||||
self.train() # setting the module in "train" mode to activate the dropouts and batchnorms
|
||||
|
||||
def forward(self, inputs):
|
||||
inputs, (hx, cx) = inputs # getting separately the input images to the tuple (hidden states, cell states)
|
||||
x = F.elu(self.conv1(inputs)) # forward propagating the signal from the input images to the 1st convolutional layer
|
||||
x = F.elu(self.conv2(x)) # forward propagating the signal from the 1st convolutional layer to the 2nd convolutional layer
|
||||
x = F.elu(self.conv3(x)) # forward propagating the signal from the 2nd convolutional layer to the 3rd convolutional layer
|
||||
x = F.elu(self.conv4(x)) # forward propagating the signal from the 3rd convolutional layer to the 4th convolutional layer
|
||||
x = x.view(-1, 32 * 3 * 3) # flattening the last convolutional layer into this 1D vector x
|
||||
hx, cx = self.lstm(x, (hx, cx)) # the LSTM takes as input x and the old hidden & cell states and ouputs the new hidden & cell states
|
||||
x = hx # getting the useful output, which are the hidden states (principle of the LSTM)
|
||||
return self.critic_linear(x), self.actor_linear(x), (hx, cx) # returning the output of the critic (V(S)), the output of the actor (Q(S,A)), and the new hidden & cell states ((hx, cx))
|
@ -0,0 +1,50 @@
|
||||
# Optimizer
|
||||
|
||||
import math
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
|
||||
# Implementing the Adam optimizer with shared states
|
||||
|
||||
class SharedAdam(optim.Adam): # object that inherits from optim.Adam
|
||||
|
||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
|
||||
super(SharedAdam, self).__init__(params, lr, betas, eps, weight_decay) # inheriting from the tools of optim.Adam
|
||||
for group in self.param_groups: # self.param_groups contains all the attributes of the optimizer, including the parameters to optimize (the weights of the network) contained in self.param_groups['params']
|
||||
for p in group['params']: # for each tensor p of weights to optimize
|
||||
state = self.state[p] # at the beginning, self.state is an empty dictionary so state = {} and self.state = {p:{}} = {p: state}
|
||||
state['step'] = torch.zeros(1) # counting the steps: state = {'step' : tensor([0])}
|
||||
state['exp_avg'] = p.data.new().resize_as_(p.data).zero_() # the update of the adam optimizer is based on an exponential moving average of the gradient (moment 1)
|
||||
state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_() # the update of the adam optimizer is also based on an exponential moving average of the squared of the gradient (moment 2)
|
||||
|
||||
# Sharing the memory
|
||||
def share_memory(self):
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
state = self.state[p]
|
||||
state['step'].share_memory_() # tensor.share_memory_() acts a little bit like tensor.cuda()
|
||||
state['exp_avg'].share_memory_() # tensor.share_memory_() acts a little bit like tensor.cuda()
|
||||
state['exp_avg_sq'].share_memory_() # tensor.share_memory_() acts a little bit like tensor.cuda()
|
||||
|
||||
# Performing a single optimization step of the Adam algorithm (see algorithm 1 in https://arxiv.org/pdf/1412.6980.pdf)
|
||||
def step(self):
|
||||
loss = None
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
state = self.state[p]
|
||||
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
|
||||
beta1, beta2 = group['betas']
|
||||
state['step'] += 1
|
||||
if group['weight_decay'] != 0:
|
||||
grad = grad.add(group['weight_decay'], p.data)
|
||||
exp_avg.mul_(beta1).add_(1 - beta1, grad)
|
||||
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
denom = exp_avg_sq.sqrt().add_(group['eps'])
|
||||
bias_correction1 = 1 - beta1 ** state['step'][0]
|
||||
bias_correction2 = 1 - beta2 ** state['step'][0]
|
||||
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
|
||||
p.data.addcdiv_(-step_size, exp_avg, denom)
|
||||
return loss
|
@ -0,0 +1,46 @@
|
||||
# Test Agent
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from envs import create_atari_env
|
||||
from model import ActorCritic
|
||||
from torch.autograd import Variable
|
||||
import time
|
||||
from collections import deque
|
||||
|
||||
# Making the test agent (won't update the model but will just use the shared model to explore)
|
||||
def test(rank, params, shared_model):
|
||||
torch.manual_seed(params.seed + rank) # asynchronizing the test agent
|
||||
env = create_atari_env(params.env_name, video=True) # running an environment with a video
|
||||
env.seed(params.seed + rank) # asynchronizing the environment
|
||||
model = ActorCritic(env.observation_space.shape[0], env.action_space) # creating one model
|
||||
model.eval() # putting the model in "eval" model because it won't be trained
|
||||
state = env.reset() # getting the input images as numpy arrays
|
||||
state = torch.from_numpy(state) # converting them into torch tensors
|
||||
reward_sum = 0 # initializing the sum of rewards to 0
|
||||
done = True # initializing done to True
|
||||
start_time = time.time() # getting the starting time to measure the computation time
|
||||
actions = deque(maxlen=100) # cf https://pymotw.com/2/collections/deque.html
|
||||
episode_length = 0 # initializing the episode length to 0
|
||||
while True: # repeat
|
||||
episode_length += 1 # incrementing the episode length by one
|
||||
if done: # synchronizing with the shared model (same as train.py)
|
||||
model.load_state_dict(shared_model.state_dict())
|
||||
cx = Variable(torch.zeros(1, 256), volatile=True)
|
||||
hx = Variable(torch.zeros(1, 256), volatile=True)
|
||||
else:
|
||||
cx = Variable(cx.data, volatile=True)
|
||||
hx = Variable(hx.data, volatile=True)
|
||||
value, action_value, (hx, cx) = model((Variable(state.unsqueeze(0), volatile=True), (hx, cx)))
|
||||
prob = F.softmax(action_value)
|
||||
action = prob.max(1)[1].data.numpy() # the test agent does not explore, it directly plays the best action
|
||||
state, reward, done, _ = env.step(action[0]) # done = done or episode_length >= params.max_episode_length
|
||||
reward_sum += reward
|
||||
if done: # printing the results at the end of each part
|
||||
print("Time {}, episode reward {}, episode length {}".format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, episode_length))
|
||||
reward_sum = 0 # reinitializing the sum of rewards
|
||||
episode_length = 0 # reinitializing the episode length
|
||||
actions.clear() # reinitializing the actions
|
||||
state = env.reset() # reinitializing the environment
|
||||
time.sleep(60) # doing a one minute break to let the other agents practice (if the game is done)
|
||||
state = torch.from_numpy(state) # new state and we continue
|
BIN
Intelligence Artificielle de A a Z/3. Breakout/Code_With_Comments/test/.DS_Store
vendored
Normal file
BIN
Intelligence Artificielle de A a Z/3. Breakout/Code_With_Comments/test/.DS_Store
vendored
Normal file
Binary file not shown.
@ -0,0 +1,78 @@
|
||||
# Training the AI
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from envs import create_atari_env
|
||||
from model import ActorCritic
|
||||
from torch.autograd import Variable
|
||||
|
||||
# Implementing a function to make sure the models share the same gradient
|
||||
def ensure_shared_grads(model, shared_model):
|
||||
for param, shared_param in zip(model.parameters(), shared_model.parameters()):
|
||||
if shared_param.grad is not None:
|
||||
return
|
||||
shared_param._grad = param.grad
|
||||
|
||||
def train(rank, params, shared_model, optimizer):
|
||||
torch.manual_seed(params.seed + rank) # shifting the seed with rank to asynchronize each training agent
|
||||
env = create_atari_env(params.env_name) # creating an optimized environment thanks to the create_atari_env function
|
||||
env.seed(params.seed + rank) # aligning the seed of the environment on the seed of the agent
|
||||
model = ActorCritic(env.observation_space.shape[0], env.action_space) # creating the model from the ActorCritic class
|
||||
state = env.reset() # state is a numpy array of size 1*42*42, in black & white
|
||||
state = torch.from_numpy(state) # converting the numpy array into a torch tensor
|
||||
done = True # when the game is done
|
||||
episode_length = 0 # initializing the length of an episode to 0
|
||||
while True: # repeat
|
||||
episode_length += 1 # incrementing the episode length by one
|
||||
model.load_state_dict(shared_model.state_dict()) # synchronizing with the shared model - the agent gets the shared model to do an exploration on num_steps
|
||||
if done: # if it is the first iteration of the while loop or if the game was just done, then:
|
||||
cx = Variable(torch.zeros(1, 256)) # the cell states of the LSTM are reinitialized to zero
|
||||
hx = Variable(torch.zeros(1, 256)) # the hidden states of the LSTM are reinitialized to zero
|
||||
else: # else:
|
||||
cx = Variable(cx.data) # we keep the old cell states, making sure they are in a torch variable
|
||||
hx = Variable(hx.data) # we keep the old hidden states, making sure they are in a torch variable
|
||||
values = [] # initializing the list of values (V(S))
|
||||
log_probs = [] # initializing the list of log probabilities
|
||||
rewards = [] # initializing the list of rewards
|
||||
entropies = [] # initializing the list of entropies
|
||||
for step in range(params.num_steps): # going through the num_steps exploration steps
|
||||
value, action_values, (hx, cx) = model((Variable(state.unsqueeze(0)), (hx, cx))) # getting from the model the output V(S) of the critic, the output Q(S,A) of the actor, and the new hidden & cell states
|
||||
prob = F.softmax(action_values) # generating a distribution of probabilities of the Q-values according to the softmax: prob(a) = exp(prob(a))/sum_b(exp(prob(b)))
|
||||
log_prob = F.log_softmax(action_values) # generating a distribution of log probabilities of the Q-values according to the log softmax: log_prob(a) = log(prob(a))
|
||||
entropy = -(log_prob * prob).sum(1) # H(p) = - sum_x p(x).log(p(x))
|
||||
entropies.append(entropy) # storing the computed entropy
|
||||
action = prob.multinomial().data # selecting an action by taking a random draw from the prob distribution
|
||||
log_prob = log_prob.gather(1, Variable(action)) # getting the log prob associated to this selected action
|
||||
values.append(value) # storing the value V(S) of the state
|
||||
log_probs.append(log_prob) # storing the log prob of the action
|
||||
state, reward, done, _ = env.step(action.numpy()) # playing the selected action, reaching the new state, and getting the new reward
|
||||
done = (done or episode_length >= params.max_episode_length) # if the episode lasts too long (the agent is stucked), then it is done
|
||||
reward = max(min(reward, 1), -1) # clamping the reward between -1 and +1
|
||||
if done: # if the episode is done:
|
||||
episode_length = 0 # we restart the environment
|
||||
state = env.reset() # we restart the environment
|
||||
state = torch.from_numpy(state) # tensorizing the new state
|
||||
rewards.append(reward) # storing the new observed reward
|
||||
if done: # if we are done
|
||||
break # we stop the exploration and we directly move on to the next step: the update of the shared model
|
||||
R = torch.zeros(1, 1) # intializing the cumulative reward
|
||||
if not done: # if we are not done:
|
||||
value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx))) # we initialize the cumulative reward with the value of the last shared state
|
||||
R = value.data # we initialize the cumulative reward with the value of the last shared state
|
||||
values.append(Variable(R)) # storing the value V(S) of the last reached state S
|
||||
policy_loss = 0 # initializing the policy loss
|
||||
value_loss = 0 # initializing the value loss
|
||||
R = Variable(R) # making sure the cumulative reward R is a torch Variable
|
||||
gae = torch.zeros(1, 1) # initializing the Generalized Advantage Estimation to 0
|
||||
for i in reversed(range(len(rewards))): # starting from the last exploration step and going back in time
|
||||
R = params.gamma * R + rewards[i] # R = gamma*R + r_t = r_0 + gamma r_1 + gamma^2 * r_2 ... + gamma^(n-1)*r_(n-1) + gamma^nb_step * V(last_state)
|
||||
advantage = R - values[i] # R is an estimator of Q at time t = i so advantage_i = Q_i - V(state_i) = R - value[i]
|
||||
value_loss = value_loss + 0.5 * advantage.pow(2) # computing the value loss
|
||||
TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data # computing the temporal difference
|
||||
gae = gae * params.gamma * params.tau + TD # gae = sum_i (gamma*tau)^i * TD(i) with gae_i = gae_(i+1)*gamma*tau + (r_i + gamma*V(state_i+1) - V(state_i))
|
||||
policy_loss = policy_loss - log_probs[i] * Variable(gae) - 0.01 * entropies[i] # computing the policy loss
|
||||
optimizer.zero_grad() # initializing the optimizer
|
||||
(policy_loss + 0.5 * value_loss).backward() # we give 2x more importance to the policy loss than the value loss because the policy loss is smaller
|
||||
torch.nn.utils.clip_grad_norm(model.parameters(), 40) # clamping the values of gradient between 0 and 40 to prevent the gradient from taking huge values and degenerating the algorithm
|
||||
ensure_shared_grads(model, shared_model) # making sure the model of the agent and the shared model share the same gradient
|
||||
optimizer.step() # running the optimization step
|
1
Intelligence Artificielle de A a Z/gym/.dockerignore
Normal file
1
Intelligence Artificielle de A a Z/gym/.dockerignore
Normal file
@ -0,0 +1 @@
|
||||
.tox
|
39
Intelligence Artificielle de A a Z/gym/.gitignore
vendored
Normal file
39
Intelligence Artificielle de A a Z/gym/.gitignore
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
*.swp
|
||||
*.pyc
|
||||
*.py~
|
||||
.DS_Store
|
||||
.cache
|
||||
.pytest_cache/
|
||||
|
||||
# Setuptools distribution and build folders.
|
||||
/dist/
|
||||
/build
|
||||
|
||||
# Virtualenv
|
||||
/env
|
||||
|
||||
# Python egg metadata, regenerated from source files by setuptools.
|
||||
/*.egg-info
|
||||
|
||||
*.sublime-project
|
||||
*.sublime-workspace
|
||||
|
||||
logs/
|
||||
|
||||
.ipynb_checkpoints
|
||||
ghostdriver.log
|
||||
|
||||
junk
|
||||
MUJOCO_LOG.txt
|
||||
|
||||
rllab_mujoco
|
||||
|
||||
tutorial/*.html
|
||||
|
||||
# IDE files
|
||||
.eggs
|
||||
.tox
|
||||
|
||||
# PyCharm project files
|
||||
.idea
|
||||
vizdoom.ini
|
21
Intelligence Artificielle de A a Z/gym/.travis.yml
Normal file
21
Intelligence Artificielle de A a Z/gym/.travis.yml
Normal file
@ -0,0 +1,21 @@
|
||||
sudo: required
|
||||
language: python
|
||||
services:
|
||||
- docker
|
||||
env:
|
||||
# - UBUNTU_VER=14.04 - problems with atari-py
|
||||
- UBUNTU_VER=16.04
|
||||
- UBUNTU_VER=18.04
|
||||
|
||||
install: "" # so travis doesn't do pip install requirements.txt
|
||||
script:
|
||||
- docker build -f test.dockerfile.${UBUNTU_VER} -t gym-test --build-arg MUJOCO_KEY=$MUJOCO_KEY .
|
||||
- docker run -e MUJOCO_KEY=$MUJOCO_KEY gym-test tox
|
||||
|
||||
deploy:
|
||||
provider: pypi
|
||||
username: $TWINE_USERNAME
|
||||
password: $TWINE_PASSWORD
|
||||
on:
|
||||
tags: true
|
||||
condition: $UBUNTU_VER = 16.04
|
13
Intelligence Artificielle de A a Z/gym/CODE_OF_CONDUCT.rst
Normal file
13
Intelligence Artificielle de A a Z/gym/CODE_OF_CONDUCT.rst
Normal file
@ -0,0 +1,13 @@
|
||||
OpenAI Gym is dedicated to providing a harassment-free experience for
|
||||
everyone, regardless of gender, gender identity and expression, sexual
|
||||
orientation, disability, physical appearance, body size, age, race, or
|
||||
religion. We do not tolerate harassment of participants in any form.
|
||||
|
||||
This code of conduct applies to all OpenAI Gym spaces (including Gist
|
||||
comments) both online and off. Anyone who violates this code of
|
||||
conduct may be sanctioned or expelled from these spaces at the
|
||||
discretion of the OpenAI team.
|
||||
|
||||
We may add additional rules over time, which will be made clearly
|
||||
available to participants. Participants are responsible for knowing
|
||||
and abiding by these rules.
|
36
Intelligence Artificielle de A a Z/gym/LICENSE.md
Normal file
36
Intelligence Artificielle de A a Z/gym/LICENSE.md
Normal file
@ -0,0 +1,36 @@
|
||||
# gym
|
||||
|
||||
The MIT License
|
||||
|
||||
Copyright (c) 2016 OpenAI (https://openai.com)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
# Mujoco models
|
||||
This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license:
|
||||
```
|
||||
This file is part of MuJoCo.
|
||||
Copyright 2009-2015 Roboti LLC.
|
||||
Mujoco :: Advanced physics simulation engine
|
||||
Source : www.roboti.us
|
||||
Version : 1.31
|
||||
Released : 23Apr16
|
||||
Author :: Vikash Kumar
|
||||
Contacts : kumar@roboti.us
|
||||
```
|
24
Intelligence Artificielle de A a Z/gym/Makefile
Normal file
24
Intelligence Artificielle de A a Z/gym/Makefile
Normal file
@ -0,0 +1,24 @@
|
||||
.PHONY: install test
|
||||
|
||||
install:
|
||||
pip install -r requirements.txt
|
||||
|
||||
base:
|
||||
docker pull ubuntu:14.04
|
||||
docker tag ubuntu:14.04 quay.io/openai/gym:base
|
||||
docker push quay.io/openai/gym:base
|
||||
|
||||
test:
|
||||
docker build -f test.dockerfile -t quay.io/openai/gym:test .
|
||||
docker push quay.io/openai/gym:test
|
||||
|
||||
upload:
|
||||
rm -rf dist
|
||||
python setup.py sdist
|
||||
twine upload dist/*
|
||||
|
||||
docker-build:
|
||||
docker build -t quay.io/openai/gym .
|
||||
|
||||
docker-run:
|
||||
docker run -ti quay.io/openai/gym bash
|
352
Intelligence Artificielle de A a Z/gym/README.rst
Normal file
352
Intelligence Artificielle de A a Z/gym/README.rst
Normal file
@ -0,0 +1,352 @@
|
||||
**Status:** Maintenance (expect bug fixes and minor updates)
|
||||
|
||||
OpenAI Gym
|
||||
**********
|
||||
|
||||
**OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to a standardized set of environments.
|
||||
|
||||
.. image:: https://travis-ci.org/openai/gym.svg?branch=master
|
||||
:target: https://travis-ci.org/openai/gym
|
||||
|
||||
`See What's New section below <#what-s-new>`_
|
||||
|
||||
``gym`` makes no assumptions about the structure of your agent, and is compatible with any numerical computation library, such as TensorFlow or Theano. You can use it from Python code, and soon from other languages.
|
||||
|
||||
If you're not sure where to start, we recommend beginning with the
|
||||
`docs <https://gym.openai.com/docs>`_ on our site. See also the `FAQ <https://github.com/openai/gym/wiki/FAQ>`_.
|
||||
|
||||
A whitepaper for OpenAI Gym is available at http://arxiv.org/abs/1606.01540, and here's a BibTeX entry that you can use to cite it in a publication::
|
||||
|
||||
@misc{1606.01540,
|
||||
Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba},
|
||||
Title = {OpenAI Gym},
|
||||
Year = {2016},
|
||||
Eprint = {arXiv:1606.01540},
|
||||
}
|
||||
|
||||
.. contents:: **Contents of this document**
|
||||
:depth: 2
|
||||
|
||||
Basics
|
||||
======
|
||||
|
||||
There are two basic concepts in reinforcement learning: the
|
||||
environment (namely, the outside world) and the agent (namely, the
|
||||
algorithm you are writing). The agent sends `actions` to the
|
||||
environment, and the environment replies with `observations` and
|
||||
`rewards` (that is, a score).
|
||||
|
||||
The core `gym` interface is `Env <https://github.com/openai/gym/blob/master/gym/core.py>`_, which is
|
||||
the unified environment interface. There is no interface for agents;
|
||||
that part is left to you. The following are the ``Env`` methods you
|
||||
should know:
|
||||
|
||||
- `reset(self)`: Reset the environment's state. Returns `observation`.
|
||||
- `step(self, action)`: Step the environment by one timestep. Returns `observation`, `reward`, `done`, `info`.
|
||||
- `render(self, mode='human', close=False)`: Render one frame of the environment. The default mode will do something human friendly, such as pop up a window. Passing the `close` flag signals the renderer to close any such windows.
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
You can perform a minimal install of ``gym`` with:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
git clone https://github.com/openai/gym.git
|
||||
cd gym
|
||||
pip install -e .
|
||||
|
||||
If you prefer, you can do a minimal install of the packaged version directly from PyPI:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
pip install gym
|
||||
|
||||
You'll be able to run a few environments right away:
|
||||
|
||||
- algorithmic
|
||||
- toy_text
|
||||
- classic_control (you'll need ``pyglet`` to render though)
|
||||
|
||||
We recommend playing with those environments at first, and then later
|
||||
installing the dependencies for the remaining environments.
|
||||
|
||||
Installing everything
|
||||
---------------------
|
||||
|
||||
To install the full set of environments, you'll need to have some system
|
||||
packages installed. We'll build out the list here over time; please let us know
|
||||
what you end up installing on your platform. Also, take a look at the docker files (test.dockerfile.xx.xx) to
|
||||
see the composition of our CI-tested images.
|
||||
|
||||
On OSX:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
brew install cmake boost boost-python sdl2 swig wget
|
||||
|
||||
On Ubuntu 14.04 (non-mujoco only):
|
||||
|
||||
.. code:: shell
|
||||
|
||||
apt-get install libjpeg-dev cmake swig python-pyglet python3-opengl libboost-all-dev \
|
||||
libsdl2-2.0.0 libsdl2-dev libglu1-mesa libglu1-mesa-dev libgles2-mesa-dev \
|
||||
freeglut3 xvfb libav-tools
|
||||
|
||||
|
||||
On Ubuntu 16.04:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
apt-get install -y python-pyglet python3-opengl zlib1g-dev libjpeg-dev patchelf \
|
||||
cmake swig libboost-all-dev libsdl2-dev libosmesa6-dev xvfb ffmpeg
|
||||
|
||||
On Ubuntu 18.04:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
apt install -y python3-dev zlib1g-dev libjpeg-dev cmake swig python-pyglet python3-opengl libboost-all-dev libsdl2-dev \
|
||||
libosmesa6-dev patchelf ffmpeg xvfb
|
||||
|
||||
|
||||
MuJoCo has a proprietary dependency we can't set up for you. Follow
|
||||
the
|
||||
`instructions <https://github.com/openai/mujoco-py#obtaining-the-binaries-and-license-key>`_
|
||||
in the ``mujoco-py`` package for help.
|
||||
|
||||
Once you're ready to install everything, run ``pip install -e '.[all]'`` (or ``pip install 'gym[all]'``).
|
||||
|
||||
Supported systems
|
||||
-----------------
|
||||
|
||||
We currently support Linux and OS X running Python 2.7 or 3.5. Some users on OSX + Python3 may need to run
|
||||
|
||||
.. code:: shell
|
||||
|
||||
brew install boost-python --with-python3
|
||||
|
||||
If you want to access Gym from languages other than python, we have limited support for non-python
|
||||
frameworks, such as lua/Torch, using the OpenAI Gym `HTTP API <https://github.com/openai/gym-http-api>`_.
|
||||
|
||||
Pip version
|
||||
-----------
|
||||
|
||||
To run ``pip install -e '.[all]'``, you'll need a semi-recent pip.
|
||||
Please make sure your pip is at least at version ``1.5.0``. You can
|
||||
upgrade using the following: ``pip install --ignore-installed
|
||||
pip``. Alternatively, you can open `setup.py
|
||||
<https://github.com/openai/gym/blob/master/setup.py>`_ and
|
||||
install the dependencies by hand.
|
||||
|
||||
Rendering on a server
|
||||
---------------------
|
||||
|
||||
If you're trying to render video on a server, you'll need to connect a
|
||||
fake display. The easiest way to do this is by running under
|
||||
``xvfb-run`` (on Ubuntu, install the ``xvfb`` package):
|
||||
|
||||
.. code:: shell
|
||||
|
||||
xvfb-run -s "-screen 0 1400x900x24" bash
|
||||
|
||||
Installing dependencies for specific environments
|
||||
-------------------------------------------------
|
||||
|
||||
If you'd like to install the dependencies for only specific
|
||||
environments, see `setup.py
|
||||
<https://github.com/openai/gym/blob/master/setup.py>`_. We
|
||||
maintain the lists of dependencies on a per-environment group basis.
|
||||
|
||||
Environments
|
||||
============
|
||||
|
||||
The code for each environment group is housed in its own subdirectory
|
||||
`gym/envs
|
||||
<https://github.com/openai/gym/blob/master/gym/envs>`_. The
|
||||
specification of each task is in `gym/envs/__init__.py
|
||||
<https://github.com/openai/gym/blob/master/gym/envs/__init__.py>`_. It's
|
||||
worth browsing through both.
|
||||
|
||||
Algorithmic
|
||||
-----------
|
||||
|
||||
These are a variety of algorithmic tasks, such as learning to copy a
|
||||
sequence.
|
||||
|
||||
.. code:: python
|
||||
|
||||
import gym
|
||||
env = gym.make('Copy-v0')
|
||||
env.reset()
|
||||
env.render()
|
||||
|
||||
Atari
|
||||
-----
|
||||
|
||||
The Atari environments are a variety of Atari video games. If you didn't do the full install, you can install dependencies via ``pip install -e '.[atari]'`` (you'll need ``cmake`` installed) and then get started as follow:
|
||||
|
||||
.. code:: python
|
||||
|
||||
import gym
|
||||
env = gym.make('SpaceInvaders-v0')
|
||||
env.reset()
|
||||
env.render()
|
||||
|
||||
This will install ``atari-py``, which automatically compiles the `Arcade Learning Environment <http://www.arcadelearningenvironment.org/>`_. This can take quite a while (a few minutes on a decent laptop), so just be prepared.
|
||||
|
||||
Box2d
|
||||
-----------
|
||||
|
||||
Box2d is a 2D physics engine. You can install it via ``pip install -e '.[box2d]'`` and then get started as follow:
|
||||
|
||||
.. code:: python
|
||||
|
||||
import gym
|
||||
env = gym.make('LunarLander-v2')
|
||||
env.reset()
|
||||
env.render()
|
||||
|
||||
Classic control
|
||||
---------------
|
||||
|
||||
These are a variety of classic control tasks, which would appear in a typical reinforcement learning textbook. If you didn't do the full install, you will need to run ``pip install -e '.[classic_control]'`` to enable rendering. You can get started with them via:
|
||||
|
||||
.. code:: python
|
||||
|
||||
import gym
|
||||
env = gym.make('CartPole-v0')
|
||||
env.reset()
|
||||
env.render()
|
||||
|
||||
MuJoCo
|
||||
------
|
||||
|
||||
`MuJoCo <http://www.mujoco.org/>`_ is a physics engine which can do
|
||||
very detailed efficient simulations with contacts. It's not
|
||||
open-source, so you'll have to follow the instructions in `mujoco-py
|
||||
<https://github.com/openai/mujoco-py#obtaining-the-binaries-and-license-key>`_
|
||||
to set it up. You'll have to also run ``pip install -e '.[mujoco]'`` if you didn't do the full install.
|
||||
|
||||
.. code:: python
|
||||
|
||||
import gym
|
||||
env = gym.make('Humanoid-v2')
|
||||
env.reset()
|
||||
env.render()
|
||||
|
||||
|
||||
Robotics
|
||||
------
|
||||
|
||||
`MuJoCo <http://www.mujoco.org/>`_ is a physics engine which can do
|
||||
very detailed efficient simulations with contacts and we use it for all robotics environments. It's not
|
||||
open-source, so you'll have to follow the instructions in `mujoco-py
|
||||
<https://github.com/openai/mujoco-py#obtaining-the-binaries-and-license-key>`_
|
||||
to set it up. You'll have to also run ``pip install -e '.[robotics]'`` if you didn't do the full install.
|
||||
|
||||
.. code:: python
|
||||
|
||||
import gym
|
||||
env = gym.make('HandManipulateBlock-v0')
|
||||
env.reset()
|
||||
env.render()
|
||||
|
||||
You can also find additional details in the accompanying `technical report <https://arxiv.org/abs/1802.09464>`_ and `blog post <https://blog.openai.com/ingredients-for-robotics-research/>`_.
|
||||
If you use these environments, you can cite them as follows::
|
||||
|
||||
@misc{1802.09464,
|
||||
Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba},
|
||||
Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research},
|
||||
Year = {2018},
|
||||
Eprint = {arXiv:1802.09464},
|
||||
}
|
||||
|
||||
Toy text
|
||||
--------
|
||||
|
||||
Toy environments which are text-based. There's no extra dependency to install, so to get started, you can just do:
|
||||
|
||||
.. code:: python
|
||||
|
||||
import gym
|
||||
env = gym.make('FrozenLake-v0')
|
||||
env.reset()
|
||||
env.render()
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
See the ``examples`` directory.
|
||||
|
||||
- Run `examples/agents/random_agent.py <https://github.com/openai/gym/blob/master/examples/agents/random_agent.py>`_ to run an simple random agent.
|
||||
- Run `examples/agents/cem.py <https://github.com/openai/gym/blob/master/examples/agents/cem.py>`_ to run an actual learning agent (using the cross-entropy method).
|
||||
- Run `examples/scripts/list_envs <https://github.com/openai/gym/blob/master/examples/scripts/list_envs>`_ to generate a list of all environments.
|
||||
|
||||
Testing
|
||||
=======
|
||||
|
||||
We are using `pytest <http://doc.pytest.org>`_ for tests. You can run them via:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
pytest
|
||||
|
||||
|
||||
.. _See What's New section below:
|
||||
|
||||
What's new
|
||||
==========
|
||||
|
||||
- 2018-02-28: Release of a set of new robotics environments.
|
||||
- 2018-01-25: Made some aesthetic improvements and removed unmaintained parts of gym. This may seem like a downgrade in functionality, but it is actually a long-needed cleanup in preparation for some great new things that will be released in the next month.
|
||||
|
||||
+ Now your `Env` and `Wrapper` subclasses should define `step`, `reset`, `render`, `close`, `seed` rather than underscored method names.
|
||||
+ Removed the `board_game`, `debugging`, `safety`, `parameter_tuning` environments since they're not being maintained by us at OpenAI. We encourage authors and users to create new repositories for these environments.
|
||||
+ Changed `MultiDiscrete` action space to range from `[0, ..., n-1]` rather than `[a, ..., b-1]`.
|
||||
+ No more `render(close=True)`, use env-specific methods to close the rendering.
|
||||
+ Removed `scoreboard` directory, since site doesn't exist anymore.
|
||||
+ Moved `gym/monitoring` to `gym/wrappers/monitoring`
|
||||
+ Add `dtype` to `Space`.
|
||||
+ Not using python's built-in module anymore, using `gym.logger`
|
||||
|
||||
- 2018-01-24: All continuous control environments now use mujoco_py >= 1.50.
|
||||
Versions have been updated accordingly to -v2, e.g. HalfCheetah-v2. Performance
|
||||
should be similar (see https://github.com/openai/gym/pull/834) but there are likely
|
||||
some differences due to changes in MuJoCo.
|
||||
- 2017-06-16: Make env.spec into a property to fix a bug that occurs
|
||||
when you try to print out an unregistered Env.
|
||||
- 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at
|
||||
*v4*. To keep using the old v3 environments, keep gym <= 0.8.2 and atari-py
|
||||
<= 0.0.21. Note that the v4 environments will not give identical results to
|
||||
existing v3 results, although differences are minor. The v4 environments
|
||||
incorporate the latest Arcade Learning Environment (ALE), including several
|
||||
ROM fixes, and now handle loading and saving of the emulator state. While
|
||||
seeds still ensure determinism, the effect of any given seed is not preserved
|
||||
across this upgrade because the random number generator in ALE has changed.
|
||||
The `*NoFrameSkip-v4` environments should be considered the canonical Atari
|
||||
environments from now on.
|
||||
- 2017-03-05: BACKWARDS INCOMPATIBILITY: The `configure` method has been removed
|
||||
from `Env`. `configure` was not used by `gym`, but was used by some dependent
|
||||
libraries including `universe`. These libraries will migrate away from the
|
||||
configure method by using wrappers instead. This change is on master and will be released with 0.8.0.
|
||||
- 2016-12-27: BACKWARDS INCOMPATIBILITY: The gym monitor is now a
|
||||
wrapper. Rather than starting monitoring as
|
||||
`env.monitor.start(directory)`, envs are now wrapped as follows:
|
||||
`env = wrappers.Monitor(env, directory)`. This change is on master
|
||||
and will be released with 0.7.0.
|
||||
- 2016-11-1: Several experimental changes to how a running monitor interacts
|
||||
with environments. The monitor will now raise an error if reset() is called
|
||||
when the env has not returned done=True. The monitor will only record complete
|
||||
episodes where done=True. Finally, the monitor no longer calls seed() on the
|
||||
underlying env, nor does it record or upload seed information.
|
||||
- 2016-10-31: We're experimentally expanding the environment ID format
|
||||
to include an optional username.
|
||||
- 2016-09-21: Switch the Gym automated logger setup to configure the
|
||||
root logger rather than just the 'gym' logger.
|
||||
- 2016-08-17: Calling `close` on an env will also close the monitor
|
||||
and any rendering windows.
|
||||
- 2016-08-17: The monitor will no longer write manifest files in
|
||||
real-time, unless `write_upon_reset=True` is passed.
|
||||
- 2016-05-28: For controlled reproducibility, envs now support seeding
|
||||
(cf #91 and #135). The monitor records which seeds are used. We will
|
||||
soon add seed information to the display on the scoreboard.
|
26
Intelligence Artificielle de A a Z/gym/bin/docker_entrypoint
Normal file
26
Intelligence Artificielle de A a Z/gym/bin/docker_entrypoint
Normal file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
# This script is the entrypoint for our Docker image.
|
||||
|
||||
set -ex
|
||||
|
||||
# Set up display; otherwise rendering will fail
|
||||
Xvfb -screen 0 1024x768x24 &
|
||||
export DISPLAY=:0
|
||||
|
||||
# Wait for the file to come up
|
||||
display=0
|
||||
file="/tmp/.X11-unix/X$display"
|
||||
for i in $(seq 1 10); do
|
||||
if [ -e "$file" ]; then
|
||||
break
|
||||
fi
|
||||
|
||||
echo "Waiting for $file to be created (try $i/10)"
|
||||
sleep "$i"
|
||||
done
|
||||
if ! [ -e "$file" ]; then
|
||||
echo "Timing out: $file was not created"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exec "$@"
|
21
Intelligence Artificielle de A a Z/gym/bin/render.py
Normal file
21
Intelligence Artificielle de A a Z/gym/bin/render.py
Normal file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import gym
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Renders a Gym environment for quick inspection.')
|
||||
parser.add_argument('env_id', type=str, help='the ID of the environment to be rendered (e.g. HalfCheetah-v1')
|
||||
parser.add_argument('--step', type=int, default=1)
|
||||
args = parser.parse_args()
|
||||
|
||||
env = gym.make(args.env_id)
|
||||
env.reset()
|
||||
|
||||
step = 0
|
||||
while True:
|
||||
if args.step:
|
||||
env.step(env.action_space.sample())
|
||||
env.render()
|
||||
if step % 10 == 0:
|
||||
env.reset()
|
||||
step += 1
|
33
Intelligence Artificielle de A a Z/gym/docs/agents.md
Normal file
33
Intelligence Artificielle de A a Z/gym/docs/agents.md
Normal file
@ -0,0 +1,33 @@
|
||||
# Agents
|
||||
|
||||
An "agent" describes the method of running an RL algorithm against an environment in the gym. The agent may contain the algorithm itself or simply provide an integration between an algorithm and the gym environments.
|
||||
|
||||
## RandomAgent
|
||||
|
||||
A sample agent located in this repo at `gym/examples/agents/random_agent.py`. This simple agent leverages the environments ability to produce a random valid action and does so for each step.
|
||||
|
||||
## cem.py
|
||||
|
||||
A generic Cross-Entropy agent located in this repo at `gym/examples/agents/cem.py`. This agent defaults to 10 iterations of 25 episodes considering the top 20% "elite".
|
||||
|
||||
## dqn
|
||||
|
||||
This is a very basic DQN (with experience replay) implementation, which uses OpenAI's gym environment and Keras/Theano neural networks. [/sherjilozair/dqn](https://github.com/sherjilozair/dqn)
|
||||
|
||||
## Simple DQN
|
||||
|
||||
Simple, fast and easy to extend DQN implementation using [Neon](https://github.com/NervanaSystems/neon) deep learning library. Comes with out-of-box tools to train, test and visualize models. For details see [this blog post](https://www.nervanasys.com/deep-reinforcement-learning-with-neon/) or check out the [repo](https://github.com/tambetm/simple_dqn).
|
||||
|
||||
## AgentNet
|
||||
A library that allows you to develop custom deep/convolutional/recurrent reinforcement learning agent with full integration with Theano/Lasagne. Also contains a toolkit for various reinforcement learning algorithms, policies, memory augmentations, etc.
|
||||
|
||||
- The repo's here: [AgentNet](https://github.com/yandexdataschool/AgentNet)
|
||||
- [A step-by-step demo for Atari SpaceInvaders ](https://github.com/yandexdataschool/AgentNet/blob/master/examples/Playing%20Atari%20with%20Deep%20Reinforcement%20Learning%20%28OpenAI%20Gym%29.ipynb)
|
||||
|
||||
## rllab
|
||||
|
||||
a framework for developing and evaluating reinforcement learning algorithms, fully compatible with OpenAI Gym. It includes a wide range of continuous control tasks plus implementations of many algorithms. [/rllab/rllab](https://github.com/rllab/rllab)
|
||||
|
||||
## [keras-rl](https://github.com/matthiasplappert/keras-rl)
|
||||
|
||||
[keras-rl](https://github.com/matthiasplappert/keras-rl) implements some state-of-the art deep reinforcement learning algorithms. It was built with OpenAI Gym in mind, and also built on top of the deep learning library [Keras](https://keras.io/) and utilises similar design patterns like callbacks and user-definable metrics.
|
48
Intelligence Artificielle de A a Z/gym/docs/environments.md
Normal file
48
Intelligence Artificielle de A a Z/gym/docs/environments.md
Normal file
@ -0,0 +1,48 @@
|
||||
# Environments
|
||||
|
||||
The gym comes prepackaged with many many environments. It's this common API around many environments that makes the gym so great. Here we will list additional environments that do not come prepacked with the gym. Submit another to this list via a pull-request.
|
||||
|
||||
_**NOTICE**: Its possible that in time OpenAI will develop a full fledged repository of supplemental environments. Until then this bit of markdown will suffice._
|
||||
|
||||
## PGE: Parallel Game Engine
|
||||
|
||||
PGE is a FOSS 3D engine for AI simulations, and can interoperate with the Gym. Contains environments with modern 3D graphics, and uses Bullet for physics.
|
||||
|
||||
Learn more here: https://github.com/222464/PGE
|
||||
|
||||
## gym-inventory: Inventory Control Environments
|
||||
|
||||
gym-inventory is a single agent domain featuring discrete state and action spaces that an AI agent might encounter in inventory control problems.
|
||||
|
||||
Learn more here: https://github.com/paulhendricks/gym-inventory
|
||||
|
||||
## gym-gazebo: training Robots in Gazebo
|
||||
|
||||
gym-gazebo presents an extension of the initial OpenAI gym for robotics using ROS and Gazebo, an advanced 3D modeling and
|
||||
rendering tool.
|
||||
|
||||
Learn more here: https://github.com/erlerobot/gym-gazebo/
|
||||
|
||||
## gym-maze: 2D maze environment
|
||||
A simple 2D maze environment where an agent finds its way from the start position to the goal.
|
||||
|
||||
Learn more here: https://github.com/tuzzer/gym-maze/
|
||||
|
||||
## gym-minigrid: Minimalistic Gridworld Environment
|
||||
|
||||
A minimalistic gridworld environment. Seeks to minimize software dependencies, be easy to extend and deliver good performance for faster training.
|
||||
|
||||
Learn more here: https://github.com/maximecb/gym-minigrid
|
||||
|
||||
## gym-sokoban: 2D Transportation Puzzles
|
||||
|
||||
The environment consists of transportation puzzles in which the player's goal is to push all boxes on the warehouse's storage locations.
|
||||
The advantage of the environment is that it generates a new random level every time it is initialized or reset, which prevents over fitting to predefined levels.
|
||||
|
||||
Learn more here: https://github.com/mpSchrader/gym-sokoban
|
||||
|
||||
## gym-duckietown: Lane-Following Simulator for Duckietown
|
||||
|
||||
A lane-following simulator built for the [Duckietown](http://duckietown.org/) project (small-scale self-driving car course).
|
||||
|
||||
Learn more here: https://github.com/duckietown/gym-duckietown
|
7
Intelligence Artificielle de A a Z/gym/docs/misc.md
Normal file
7
Intelligence Artificielle de A a Z/gym/docs/misc.md
Normal file
@ -0,0 +1,7 @@
|
||||
# Miscellaneous
|
||||
|
||||
Here we have a bunch of tools, libs, apis, tutorials, resources, etc. provided by the community to add value to the gym ecosystem.
|
||||
|
||||
## OpenAIGym.jl
|
||||
|
||||
Convenience wrapper of the OpenAI Gym for the Julia language [/tbreloff/OpenAIGym.jl](https://github.com/tbreloff/OpenAIGym.jl)
|
7
Intelligence Artificielle de A a Z/gym/docs/readme.md
Normal file
7
Intelligence Artificielle de A a Z/gym/docs/readme.md
Normal file
@ -0,0 +1,7 @@
|
||||
# Table of Contents
|
||||
|
||||
- [Agents](agents.md) contains a listing of agents compatible with gym environments. Agents facilitate the running of an algorithm against an environment.
|
||||
|
||||
- [Environments](environments.md) lists more environments to run your algorithms against. These do not come prepackaged with the gym.
|
||||
|
||||
- [Miscellaneous](misc.md) is a collection of other value-add tools and utilities. These could be anything from a small convenience lib to a collection of video tutorials or a new language binding.
|
@ -0,0 +1,19 @@
|
||||
# Support code for cem.py
|
||||
|
||||
class BinaryActionLinearPolicy(object):
|
||||
def __init__(self, theta):
|
||||
self.w = theta[:-1]
|
||||
self.b = theta[-1]
|
||||
def act(self, ob):
|
||||
y = ob.dot(self.w) + self.b
|
||||
a = int(y < 0)
|
||||
return a
|
||||
|
||||
class ContinuousActionLinearPolicy(object):
|
||||
def __init__(self, theta, n_in, n_out):
|
||||
assert len(theta) == (n_in + 1) * n_out
|
||||
self.W = theta[0 : n_in * n_out].reshape(n_in, n_out)
|
||||
self.b = theta[n_in * n_out : None].reshape(1, n_out)
|
||||
def act(self, ob):
|
||||
a = ob.dot(self.W) + self.b
|
||||
return a
|
@ -0,0 +1,93 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import gym
|
||||
from gym import wrappers, logger
|
||||
import numpy as np
|
||||
from six.moves import cPickle as pickle
|
||||
import json, sys, os
|
||||
from os import path
|
||||
from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled
|
||||
import argparse
|
||||
|
||||
def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
|
||||
"""
|
||||
Generic implementation of the cross-entropy method for maximizing a black-box function
|
||||
|
||||
f: a function mapping from vector -> scalar
|
||||
th_mean: initial mean over input distribution
|
||||
batch_size: number of samples of theta to evaluate per batch
|
||||
n_iter: number of batches
|
||||
elite_frac: each batch, select this fraction of the top-performing samples
|
||||
initial_std: initial standard deviation over parameter vectors
|
||||
"""
|
||||
n_elite = int(np.round(batch_size*elite_frac))
|
||||
th_std = np.ones_like(th_mean) * initial_std
|
||||
|
||||
for _ in range(n_iter):
|
||||
ths = np.array([th_mean + dth for dth in th_std[None,:]*np.random.randn(batch_size, th_mean.size)])
|
||||
ys = np.array([f(th) for th in ths])
|
||||
elite_inds = ys.argsort()[::-1][:n_elite]
|
||||
elite_ths = ths[elite_inds]
|
||||
th_mean = elite_ths.mean(axis=0)
|
||||
th_std = elite_ths.std(axis=0)
|
||||
yield {'ys' : ys, 'theta_mean' : th_mean, 'y_mean' : ys.mean()}
|
||||
|
||||
def do_rollout(agent, env, num_steps, render=False):
|
||||
total_rew = 0
|
||||
ob = env.reset()
|
||||
for t in range(num_steps):
|
||||
a = agent.act(ob)
|
||||
(ob, reward, done, _info) = env.step(a)
|
||||
total_rew += reward
|
||||
if render and t%3==0: env.render()
|
||||
if done: break
|
||||
return total_rew, t+1
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.set_level(logger.INFO)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--display', action='store_true')
|
||||
parser.add_argument('target', nargs="?", default="CartPole-v0")
|
||||
args = parser.parse_args()
|
||||
|
||||
env = gym.make(args.target)
|
||||
env.seed(0)
|
||||
np.random.seed(0)
|
||||
params = dict(n_iter=10, batch_size=25, elite_frac = 0.2)
|
||||
num_steps = 200
|
||||
|
||||
# You provide the directory to write to (can be an existing
|
||||
# directory, but can't contain previous monitor results. You can
|
||||
# also dump to a tempdir if you'd like: tempfile.mkdtemp().
|
||||
outdir = '/tmp/cem-agent-results'
|
||||
env = wrappers.Monitor(env, outdir, force=True)
|
||||
|
||||
# Prepare snapshotting
|
||||
# ----------------------------------------
|
||||
def writefile(fname, s):
|
||||
with open(path.join(outdir, fname), 'w') as fh: fh.write(s)
|
||||
info = {}
|
||||
info['params'] = params
|
||||
info['argv'] = sys.argv
|
||||
info['env_id'] = env.spec.id
|
||||
# ------------------------------------------
|
||||
|
||||
def noisy_evaluation(theta):
|
||||
agent = BinaryActionLinearPolicy(theta)
|
||||
rew, T = do_rollout(agent, env, num_steps)
|
||||
return rew
|
||||
|
||||
# Train the agent, and snapshot each stage
|
||||
for (i, iterdata) in enumerate(
|
||||
cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)):
|
||||
print('Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean']))
|
||||
agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
|
||||
if args.display: do_rollout(agent, env, 200, render=True)
|
||||
writefile('agent-%.4i.pkl'%i, str(pickle.dumps(agent, -1)))
|
||||
|
||||
# Write out the env at the end so we store the parameters of this
|
||||
# environment.
|
||||
writefile('info.json', json.dumps(info))
|
||||
|
||||
env.close()
|
@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
|
||||
import sys, gym, time
|
||||
|
||||
#
|
||||
# Test yourself as a learning agent! Pass environment name as a command-line argument, for example:
|
||||
#
|
||||
# python keyboard_agent.py SpaceInvadersNoFrameskip-v4
|
||||
#
|
||||
|
||||
env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
|
||||
|
||||
if not hasattr(env.action_space, 'n'):
|
||||
raise Exception('Keyboard agent only supports discrete action spaces')
|
||||
ACTIONS = env.action_space.n
|
||||
SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
|
||||
# can test what skip is still usable.
|
||||
|
||||
human_agent_action = 0
|
||||
human_wants_restart = False
|
||||
human_sets_pause = False
|
||||
|
||||
def key_press(key, mod):
|
||||
global human_agent_action, human_wants_restart, human_sets_pause
|
||||
if key==0xff0d: human_wants_restart = True
|
||||
if key==32: human_sets_pause = not human_sets_pause
|
||||
a = int( key - ord('0') )
|
||||
if a <= 0 or a >= ACTIONS: return
|
||||
human_agent_action = a
|
||||
|
||||
def key_release(key, mod):
|
||||
global human_agent_action
|
||||
a = int( key - ord('0') )
|
||||
if a <= 0 or a >= ACTIONS: return
|
||||
if human_agent_action == a:
|
||||
human_agent_action = 0
|
||||
|
||||
env.render()
|
||||
env.unwrapped.viewer.window.on_key_press = key_press
|
||||
env.unwrapped.viewer.window.on_key_release = key_release
|
||||
|
||||
def rollout(env):
|
||||
global human_agent_action, human_wants_restart, human_sets_pause
|
||||
human_wants_restart = False
|
||||
obser = env.reset()
|
||||
skip = 0
|
||||
total_reward = 0
|
||||
total_timesteps = 0
|
||||
while 1:
|
||||
if not skip:
|
||||
#print("taking action {}".format(human_agent_action))
|
||||
a = human_agent_action
|
||||
total_timesteps += 1
|
||||
skip = SKIP_CONTROL
|
||||
else:
|
||||
skip -= 1
|
||||
|
||||
obser, r, done, info = env.step(a)
|
||||
if r != 0:
|
||||
print("reward %0.3f" % r)
|
||||
total_reward += r
|
||||
window_still_open = env.render()
|
||||
if window_still_open==False: return False
|
||||
if done: break
|
||||
if human_wants_restart: break
|
||||
while human_sets_pause:
|
||||
env.render()
|
||||
time.sleep(0.1)
|
||||
time.sleep(0.1)
|
||||
print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
|
||||
|
||||
print("ACTIONS={}".format(ACTIONS))
|
||||
print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
|
||||
print("No keys pressed is taking action 0")
|
||||
|
||||
while 1:
|
||||
window_still_open = rollout(env)
|
||||
if window_still_open==False: break
|
||||
|
@ -0,0 +1,51 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
import gym
|
||||
from gym import wrappers, logger
|
||||
|
||||
class RandomAgent(object):
|
||||
"""The world's simplest agent!"""
|
||||
def __init__(self, action_space):
|
||||
self.action_space = action_space
|
||||
|
||||
def act(self, observation, reward, done):
|
||||
return self.action_space.sample()
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description=None)
|
||||
parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run')
|
||||
args = parser.parse_args()
|
||||
|
||||
# You can set the level to logger.DEBUG or logger.WARN if you
|
||||
# want to change the amount of output.
|
||||
logger.set_level(logger.INFO)
|
||||
|
||||
env = gym.make(args.env_id)
|
||||
|
||||
# You provide the directory to write to (can be an existing
|
||||
# directory, including one with existing data -- all monitor files
|
||||
# will be namespaced). You can also dump to a tempdir if you'd
|
||||
# like: tempfile.mkdtemp().
|
||||
outdir = '/tmp/random-agent-results'
|
||||
env = wrappers.Monitor(env, directory=outdir, force=True)
|
||||
env.seed(0)
|
||||
agent = RandomAgent(env.action_space)
|
||||
|
||||
episode_count = 100
|
||||
reward = 0
|
||||
done = False
|
||||
|
||||
for i in range(episode_count):
|
||||
ob = env.reset()
|
||||
while True:
|
||||
action = agent.act(ob, reward, done)
|
||||
ob, reward, done, _ = env.step(action)
|
||||
if done:
|
||||
break
|
||||
# Note there's no env.render() here. But the environment still can open window and
|
||||
# render if asked by env.monitor: it calls env.render('rgb_array') to record video.
|
||||
# Video is not recorded every episode, see capped_cubic_video_schedule for details.
|
||||
|
||||
# Close the env and write monitor result info to disk
|
||||
env.close()
|
@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Run all the tasks on a benchmark using a random agent.
|
||||
#
|
||||
# This script assumes you have set an OPENAI_GYM_API_KEY environment
|
||||
# variable. You can find your API key in the web interface:
|
||||
# https://gym.openai.com/settings/profile.
|
||||
#
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import gym
|
||||
# In modules, use `logger = logging.getLogger(__name__)`
|
||||
from gym import wrappers
|
||||
from gym.scoreboard.scoring import benchmark_score_from_local
|
||||
|
||||
import openai_benchmark
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=None)
|
||||
parser.add_argument('-b', '--benchmark-id', help='id of benchmark to run e.g. Atari7Ram-v0')
|
||||
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
|
||||
parser.add_argument('-f', '--force', action='store_true', dest='force', default=False)
|
||||
parser.add_argument('-t', '--training-dir', default="/tmp/gym-results", help='What directory to upload.')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbosity == 0:
|
||||
logger.setLevel(logging.INFO)
|
||||
elif args.verbosity >= 1:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
benchmark_id = args.benchmark_id
|
||||
if benchmark_id is None:
|
||||
logger.info("Must supply a valid benchmark")
|
||||
return 1
|
||||
|
||||
try:
|
||||
benchmark = gym.benchmark_spec(benchmark_id)
|
||||
except Exception:
|
||||
logger.info("Invalid benchmark")
|
||||
return 1
|
||||
|
||||
# run benchmark tasks
|
||||
for task in benchmark.tasks:
|
||||
logger.info("Running on env: {}".format(task.env_id))
|
||||
for trial in range(task.trials):
|
||||
env = gym.make(task.env_id)
|
||||
training_dir_name = "{}/{}-{}".format(args.training_dir, task.env_id, trial)
|
||||
env = wrappers.Monitor(env, training_dir_name, video_callable=False, force=args.force)
|
||||
env.reset()
|
||||
for _ in range(task.max_timesteps):
|
||||
o, r, done, _ = env.step(env.action_space.sample())
|
||||
if done:
|
||||
env.reset()
|
||||
env.close()
|
||||
|
||||
logger.info("""Computing statistics for this benchmark run...
|
||||
{{
|
||||
score: {score},
|
||||
num_envs_solved: {num_envs_solved},
|
||||
summed_training_seconds: {summed_training_seconds},
|
||||
start_to_finish_seconds: {start_to_finish_seconds},
|
||||
}}
|
||||
|
||||
""".rstrip().format(**benchmark_score_from_local(benchmark_id, args.training_dir)))
|
||||
|
||||
logger.info("""Done running, upload results using the following command:
|
||||
|
||||
python -c "import gym; gym.upload('{}', benchmark_id='{}', algorithm_id='(unknown)')"
|
||||
|
||||
""".rstrip().format(args.training_dir, benchmark_id))
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from gym import envs
|
||||
envids = [spec.id for spec in envs.registry.all()]
|
||||
for envid in sorted(envids):
|
||||
print(envid)
|
@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python
|
||||
import gym
|
||||
from gym import spaces, envs
|
||||
import argparse
|
||||
import numpy as np
|
||||
import itertools
|
||||
import time
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("env")
|
||||
parser.add_argument("--mode", choices=["noop", "random", "static", "human"],
|
||||
default="random")
|
||||
parser.add_argument("--max_steps", type=int, default=0)
|
||||
parser.add_argument("--fps",type=float)
|
||||
parser.add_argument("--once", action="store_true")
|
||||
parser.add_argument("--ignore_done", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
env = envs.make(args.env)
|
||||
ac_space = env.action_space
|
||||
|
||||
fps = args.fps or env.metadata.get('video.frames_per_second') or 100
|
||||
if args.max_steps == 0: args.max_steps = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']
|
||||
|
||||
while True:
|
||||
env.reset()
|
||||
env.render(mode='human')
|
||||
print("Starting a new trajectory")
|
||||
for t in range(args.max_steps) if args.max_steps else itertools.count():
|
||||
done = False
|
||||
if args.mode == "noop":
|
||||
if isinstance(ac_space, spaces.Box):
|
||||
a = np.zeros(ac_space.shape)
|
||||
elif isinstance(ac_space, spaces.Discrete):
|
||||
a = 0
|
||||
else:
|
||||
raise NotImplementedError("noop not implemented for class {}".format(type(ac_space)))
|
||||
_, _, done, _ = env.step(a)
|
||||
time.sleep(1.0/fps)
|
||||
elif args.mode == "random":
|
||||
a = ac_space.sample()
|
||||
_, _, done, _ = env.step(a)
|
||||
time.sleep(1.0/fps)
|
||||
elif args.mode == "static":
|
||||
time.sleep(1.0/fps)
|
||||
elif args.mode == "human":
|
||||
a = raw_input("type action from {0,...,%i} and press enter: "%(ac_space.n-1))
|
||||
try:
|
||||
a = int(a)
|
||||
except ValueError:
|
||||
print("WARNING: ignoring illegal action '{}'.".format(a))
|
||||
a = 0
|
||||
if a >= ac_space.n:
|
||||
print("WARNING: ignoring illegal action {}.".format(a))
|
||||
a = 0
|
||||
_, _, done, _ = env.step(a)
|
||||
|
||||
env.render()
|
||||
if done and not args.ignore_done: break
|
||||
print("Done after {} steps".format(t+1))
|
||||
if args.once:
|
||||
break
|
||||
else:
|
||||
raw_input("Press enter to continue")
|
14
Intelligence Artificielle de A a Z/gym/gym/__init__.py
Normal file
14
Intelligence Artificielle de A a Z/gym/gym/__init__.py
Normal file
@ -0,0 +1,14 @@
|
||||
import distutils.version
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
from gym import error
|
||||
from gym.utils import reraise
|
||||
from gym.version import VERSION as __version__
|
||||
|
||||
from gym.core import Env, GoalEnv, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
|
||||
from gym.envs import make, spec
|
||||
from gym import logger
|
||||
|
||||
__all__ = ["Env", "Space", "Wrapper", "make", "spec"]
|
343
Intelligence Artificielle de A a Z/gym/gym/core.py
Normal file
343
Intelligence Artificielle de A a Z/gym/gym/core.py
Normal file
@ -0,0 +1,343 @@
|
||||
from gym import logger
|
||||
|
||||
import gym
|
||||
from gym import error
|
||||
from gym.utils import closer
|
||||
|
||||
env_closer = closer.Closer()
|
||||
|
||||
# Env-related abstractions
|
||||
|
||||
class Env(object):
|
||||
"""The main OpenAI Gym class. It encapsulates an environment with
|
||||
arbitrary behind-the-scenes dynamics. An environment can be
|
||||
partially or fully observed.
|
||||
|
||||
The main API methods that users of this class need to know are:
|
||||
|
||||
step
|
||||
reset
|
||||
render
|
||||
close
|
||||
seed
|
||||
|
||||
And set the following attributes:
|
||||
|
||||
action_space: The Space object corresponding to valid actions
|
||||
observation_space: The Space object corresponding to valid observations
|
||||
reward_range: A tuple corresponding to the min and max possible rewards
|
||||
|
||||
Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
|
||||
|
||||
The methods are accessed publicly as "step", "reset", etc.. The
|
||||
non-underscored versions are wrapper methods to which we may add
|
||||
functionality over time.
|
||||
"""
|
||||
|
||||
# Set this in SOME subclasses
|
||||
metadata = {'render.modes': []}
|
||||
reward_range = (-float('inf'), float('inf'))
|
||||
spec = None
|
||||
|
||||
# Set these in ALL subclasses
|
||||
action_space = None
|
||||
observation_space = None
|
||||
|
||||
def step(self, action):
|
||||
"""Run one timestep of the environment's dynamics. When end of
|
||||
episode is reached, you are responsible for calling `reset()`
|
||||
to reset this environment's state.
|
||||
|
||||
Accepts an action and returns a tuple (observation, reward, done, info).
|
||||
|
||||
Args:
|
||||
action (object): an action provided by the environment
|
||||
|
||||
Returns:
|
||||
observation (object): agent's observation of the current environment
|
||||
reward (float) : amount of reward returned after previous action
|
||||
done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
|
||||
info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def reset(self):
|
||||
"""Resets the state of the environment and returns an initial observation.
|
||||
|
||||
Returns: observation (object): the initial observation of the
|
||||
space.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def render(self, mode='human'):
|
||||
"""Renders the environment.
|
||||
|
||||
The set of supported modes varies per environment. (And some
|
||||
environments do not support rendering at all.) By convention,
|
||||
if mode is:
|
||||
|
||||
- human: render to the current display or terminal and
|
||||
return nothing. Usually for human consumption.
|
||||
- rgb_array: Return an numpy.ndarray with shape (x, y, 3),
|
||||
representing RGB values for an x-by-y pixel image, suitable
|
||||
for turning into a video.
|
||||
- ansi: Return a string (str) or StringIO.StringIO containing a
|
||||
terminal-style text representation. The text can include newlines
|
||||
and ANSI escape sequences (e.g. for colors).
|
||||
|
||||
Note:
|
||||
Make sure that your class's metadata 'render.modes' key includes
|
||||
the list of supported modes. It's recommended to call super()
|
||||
in implementations to use the functionality of this method.
|
||||
|
||||
Args:
|
||||
mode (str): the mode to render with
|
||||
close (bool): close all open renderings
|
||||
|
||||
Example:
|
||||
|
||||
class MyEnv(Env):
|
||||
metadata = {'render.modes': ['human', 'rgb_array']}
|
||||
|
||||
def render(self, mode='human'):
|
||||
if mode == 'rgb_array':
|
||||
return np.array(...) # return RGB frame suitable for video
|
||||
elif mode is 'human':
|
||||
... # pop up a window and render
|
||||
else:
|
||||
super(MyEnv, self).render(mode=mode) # just raise an exception
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def close(self):
|
||||
"""Override _close in your subclass to perform any necessary cleanup.
|
||||
|
||||
Environments will automatically close() themselves when
|
||||
garbage collected or when the program exits.
|
||||
"""
|
||||
return
|
||||
|
||||
def seed(self, seed=None):
|
||||
"""Sets the seed for this env's random number generator(s).
|
||||
|
||||
Note:
|
||||
Some environments use multiple pseudorandom number generators.
|
||||
We want to capture all such seeds used in order to ensure that
|
||||
there aren't accidental correlations between multiple generators.
|
||||
|
||||
Returns:
|
||||
list<bigint>: Returns the list of seeds used in this env's random
|
||||
number generators. The first value in the list should be the
|
||||
"main" seed, or the value which a reproducer should pass to
|
||||
'seed'. Often, the main seed equals the provided 'seed', but
|
||||
this won't be true if seed=None, for example.
|
||||
"""
|
||||
logger.warn("Could not seed environment %s", self)
|
||||
return
|
||||
|
||||
@property
|
||||
def unwrapped(self):
|
||||
"""Completely unwrap this env.
|
||||
|
||||
Returns:
|
||||
gym.Env: The base non-wrapped gym.Env instance
|
||||
"""
|
||||
return self
|
||||
|
||||
def __str__(self):
|
||||
if self.spec is None:
|
||||
return '<{} instance>'.format(type(self).__name__)
|
||||
else:
|
||||
return '<{}<{}>>'.format(type(self).__name__, self.spec.id)
|
||||
|
||||
|
||||
class GoalEnv(Env):
|
||||
"""A goal-based environment. It functions just as any regular OpenAI Gym environment but it
|
||||
imposes a required structure on the observation_space. More concretely, the observation
|
||||
space is required to contain at least three elements, namely `observation`, `desired_goal`, and
|
||||
`achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve.
|
||||
`achieved_goal` is the goal that it currently achieved instead. `observation` contains the
|
||||
actual observations of the environment as per usual.
|
||||
"""
|
||||
|
||||
def reset(self):
|
||||
# Enforce that each GoalEnv uses a Goal-compatible observation space.
|
||||
if not isinstance(self.observation_space, gym.spaces.Dict):
|
||||
raise error.Error('GoalEnv requires an observation space of type gym.spaces.Dict')
|
||||
result = super(GoalEnv, self).reset()
|
||||
for key in ['observation', 'achieved_goal', 'desired_goal']:
|
||||
if key not in result:
|
||||
raise error.Error('GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(key))
|
||||
return result
|
||||
|
||||
def compute_reward(self, achieved_goal, desired_goal, info):
|
||||
"""Compute the step reward. This externalizes the reward function and makes
|
||||
it dependent on an a desired goal and the one that was achieved. If you wish to include
|
||||
additional rewards that are independent of the goal, you can include the necessary values
|
||||
to derive it in info and compute it accordingly.
|
||||
|
||||
Args:
|
||||
achieved_goal (object): the goal that was achieved during execution
|
||||
desired_goal (object): the desired goal that we asked the agent to attempt to achieve
|
||||
info (dict): an info dictionary with additional information
|
||||
|
||||
Returns:
|
||||
float: The reward that corresponds to the provided achieved goal w.r.t. to the desired
|
||||
goal. Note that the following should always hold true:
|
||||
|
||||
ob, reward, done, info = env.step()
|
||||
assert reward == env.compute_reward(ob['achieved_goal'], ob['goal'], info)
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
# Space-related abstractions
|
||||
|
||||
class Space(object):
|
||||
"""Defines the observation and action spaces, so you can write generic
|
||||
code that applies to any Env. For example, you can choose a random
|
||||
action.
|
||||
"""
|
||||
def __init__(self, shape=None, dtype=None):
|
||||
import numpy as np # takes about 300-400ms to import, so we load lazily
|
||||
self.shape = None if shape is None else tuple(shape)
|
||||
self.dtype = None if dtype is None else np.dtype(dtype)
|
||||
|
||||
def sample(self):
|
||||
"""
|
||||
Uniformly randomly sample a random element of this space
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def contains(self, x):
|
||||
"""
|
||||
Return boolean specifying if x is a valid
|
||||
member of this space
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
__contains__ = contains
|
||||
|
||||
def to_jsonable(self, sample_n):
|
||||
"""Convert a batch of samples from this space to a JSONable data type."""
|
||||
# By default, assume identity is JSONable
|
||||
return sample_n
|
||||
|
||||
def from_jsonable(self, sample_n):
|
||||
"""Convert a JSONable data type to a batch of samples from this space."""
|
||||
# By default, assume identity is JSONable
|
||||
return sample_n
|
||||
|
||||
|
||||
warn_once = True
|
||||
|
||||
def deprecated_warn_once(text):
|
||||
global warn_once
|
||||
if not warn_once: return
|
||||
warn_once = False
|
||||
logger.warn(text)
|
||||
|
||||
|
||||
class Wrapper(Env):
|
||||
env = None
|
||||
|
||||
def __init__(self, env):
|
||||
self.env = env
|
||||
self.action_space = self.env.action_space
|
||||
self.observation_space = self.env.observation_space
|
||||
self.reward_range = self.env.reward_range
|
||||
self.metadata = self.env.metadata
|
||||
|
||||
@classmethod
|
||||
def class_name(cls):
|
||||
return cls.__name__
|
||||
|
||||
def step(self, action):
|
||||
if hasattr(self, "_step"):
|
||||
deprecated_warn_once("%s doesn't implement 'step' method, but it implements deprecated '_step' method." % type(self))
|
||||
self.step = self._step
|
||||
return self.step(action)
|
||||
else:
|
||||
deprecated_warn_once("%s doesn't implement 'step' method, " % type(self) +
|
||||
"which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
|
||||
return self.env.step(action)
|
||||
|
||||
def reset(self, **kwargs):
|
||||
if hasattr(self, "_reset"):
|
||||
deprecated_warn_once("%s doesn't implement 'reset' method, but it implements deprecated '_reset' method." % type(self))
|
||||
self.reset = self._reset
|
||||
return self._reset(**kwargs)
|
||||
else:
|
||||
deprecated_warn_once("%s doesn't implement 'reset' method, " % type(self) +
|
||||
"which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
|
||||
return self.env.reset(**kwargs)
|
||||
|
||||
def render(self, mode='human', **kwargs):
|
||||
return self.env.render(mode, **kwargs)
|
||||
|
||||
def close(self):
|
||||
if self.env:
|
||||
return self.env.close()
|
||||
|
||||
def seed(self, seed=None):
|
||||
return self.env.seed(seed)
|
||||
|
||||
def compute_reward(self, achieved_goal, desired_goal, info):
|
||||
return self.env.compute_reward(achieved_goal, desired_goal, info)
|
||||
|
||||
def __str__(self):
|
||||
return '<{}{}>'.format(type(self).__name__, self.env)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
@property
|
||||
def unwrapped(self):
|
||||
return self.env.unwrapped
|
||||
|
||||
@property
|
||||
def spec(self):
|
||||
return self.env.spec
|
||||
|
||||
|
||||
class ObservationWrapper(Wrapper):
|
||||
def step(self, action):
|
||||
observation, reward, done, info = self.env.step(action)
|
||||
return self.observation(observation), reward, done, info
|
||||
|
||||
def reset(self, **kwargs):
|
||||
observation = self.env.reset(**kwargs)
|
||||
return self.observation(observation)
|
||||
|
||||
def observation(self, observation):
|
||||
deprecated_warn_once("%s doesn't implement 'observation' method. Maybe it implements deprecated '_observation' method." % type(self))
|
||||
return self._observation(observation)
|
||||
|
||||
|
||||
class RewardWrapper(Wrapper):
|
||||
def reset(self):
|
||||
return self.env.reset()
|
||||
|
||||
def step(self, action):
|
||||
observation, reward, done, info = self.env.step(action)
|
||||
return observation, self.reward(reward), done, info
|
||||
|
||||
def reward(self, reward):
|
||||
deprecated_warn_once("%s doesn't implement 'reward' method. Maybe it implements deprecated '_reward' method." % type(self))
|
||||
return self._reward(reward)
|
||||
|
||||
|
||||
class ActionWrapper(Wrapper):
|
||||
def step(self, action):
|
||||
action = self.action(action)
|
||||
return self.env.step(action)
|
||||
|
||||
def reset(self):
|
||||
return self.env.reset()
|
||||
|
||||
def action(self, action):
|
||||
deprecated_warn_once("%s doesn't implement 'action' method. Maybe it implements deprecated '_action' method." % type(self))
|
||||
return self._action(action)
|
||||
|
||||
def reverse_action(self, action):
|
||||
deprecated_warn_once("%s doesn't implement 'reverse_action' method. Maybe it implements deprecated '_reverse_action' method." % type(self))
|
||||
return self._reverse_action(action)
|
113
Intelligence Artificielle de A a Z/gym/gym/envs/README.md
Normal file
113
Intelligence Artificielle de A a Z/gym/gym/envs/README.md
Normal file
@ -0,0 +1,113 @@
|
||||
# Envs
|
||||
|
||||
These are the core integrated environments. Note that we may later
|
||||
restructure any of the files, but will keep the environments available
|
||||
at the relevant package's top-level. So for example, you should access
|
||||
`AntEnv` as follows:
|
||||
|
||||
```
|
||||
# Will be supported in future releases
|
||||
from gym.envs import mujoco
|
||||
mujoco.AntEnv
|
||||
```
|
||||
|
||||
Rather than:
|
||||
|
||||
```
|
||||
# May break in future releases
|
||||
from gym.envs.mujoco import ant
|
||||
ant.AntEnv
|
||||
```
|
||||
|
||||
## How to create new environments for Gym
|
||||
|
||||
* Create a new repo called gym-foo, which should also be a PIP package.
|
||||
|
||||
* A good example is https://github.com/openai/gym-soccer.
|
||||
|
||||
* It should have at least the following files:
|
||||
```sh
|
||||
gym-foo/
|
||||
README.md
|
||||
setup.py
|
||||
gym_foo/
|
||||
__init__.py
|
||||
envs/
|
||||
__init__.py
|
||||
foo_env.py
|
||||
foo_extrahard_env.py
|
||||
```
|
||||
|
||||
* `gym-foo/setup.py` should have:
|
||||
|
||||
```python
|
||||
from setuptools import setup
|
||||
|
||||
setup(name='gym_foo',
|
||||
version='0.0.1',
|
||||
install_requires=['gym'] # And any other dependencies foo needs
|
||||
)
|
||||
```
|
||||
|
||||
* `gym-foo/gym_foo/__init__.py` should have:
|
||||
```python
|
||||
from gym.envs.registration import register
|
||||
|
||||
register(
|
||||
id='foo-v0',
|
||||
entry_point='gym_foo.envs:FooEnv',
|
||||
)
|
||||
register(
|
||||
id='foo-extrahard-v0',
|
||||
entry_point='gym_foo.envs:FooExtraHardEnv',
|
||||
)
|
||||
```
|
||||
|
||||
* `gym-foo/gym_foo/envs/__init__.py` should have:
|
||||
```python
|
||||
from gym_foo.envs.foo_env import FooEnv
|
||||
from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv
|
||||
```
|
||||
|
||||
* `gym-foo/gym_foo/envs/foo_env.py` should look something like:
|
||||
```python
|
||||
import gym
|
||||
from gym import error, spaces, utils
|
||||
from gym.utils import seeding
|
||||
|
||||
class FooEnv(gym.Env):
|
||||
metadata = {'render.modes': ['human']}
|
||||
|
||||
def __init__(self):
|
||||
...
|
||||
def step(self, action):
|
||||
...
|
||||
def reset(self):
|
||||
...
|
||||
def render(self, mode='human', close=False):
|
||||
...
|
||||
```
|
||||
|
||||
## How to add new environments to Gym, within this repo (not recommended for new environments)
|
||||
|
||||
1. Write your environment in an existing collection or a new collection. All collections are subfolders of `/gym/envs'.
|
||||
2. Import your environment into the `__init__.py` file of the collection. This file will be located at `/gym/envs/my_collection/__init__.py`. Add `from gym.envs.my_collection.my_awesome_env import MyEnv` to this file.
|
||||
3. Register your env in `/gym/envs/__init__.py`:
|
||||
|
||||
```
|
||||
register(
|
||||
id='MyEnv-v0',
|
||||
entry_point='gym.envs.my_collection:MyEnv',
|
||||
)
|
||||
```
|
||||
|
||||
4. Add your environment to the scoreboard in `/gym/scoreboard/__init__.py`:
|
||||
|
||||
```
|
||||
add_task(
|
||||
id='MyEnv-v0',
|
||||
summary="Super cool environment",
|
||||
group='my_collection',
|
||||
contributor='mygithubhandle',
|
||||
)
|
||||
```
|
533
Intelligence Artificielle de A a Z/gym/gym/envs/__init__.py
Normal file
533
Intelligence Artificielle de A a Z/gym/gym/envs/__init__.py
Normal file
@ -0,0 +1,533 @@
|
||||
from gym.envs.registration import registry, register, make, spec
|
||||
|
||||
# Algorithmic
|
||||
# ----------------------------------------
|
||||
|
||||
register(
|
||||
id='Copy-v0',
|
||||
entry_point='gym.envs.algorithmic:CopyEnv',
|
||||
max_episode_steps=200,
|
||||
reward_threshold=25.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='RepeatCopy-v0',
|
||||
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
|
||||
max_episode_steps=200,
|
||||
reward_threshold=75.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='ReversedAddition-v0',
|
||||
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
|
||||
kwargs={'rows' : 2},
|
||||
max_episode_steps=200,
|
||||
reward_threshold=25.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='ReversedAddition3-v0',
|
||||
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
|
||||
kwargs={'rows' : 3},
|
||||
max_episode_steps=200,
|
||||
reward_threshold=25.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='DuplicatedInput-v0',
|
||||
entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
|
||||
max_episode_steps=200,
|
||||
reward_threshold=9.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Reverse-v0',
|
||||
entry_point='gym.envs.algorithmic:ReverseEnv',
|
||||
max_episode_steps=200,
|
||||
reward_threshold=25.0,
|
||||
)
|
||||
|
||||
# Classic
|
||||
# ----------------------------------------
|
||||
|
||||
register(
|
||||
id='CartPole-v0',
|
||||
entry_point='gym.envs.classic_control:CartPoleEnv',
|
||||
max_episode_steps=200,
|
||||
reward_threshold=195.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='CartPole-v1',
|
||||
entry_point='gym.envs.classic_control:CartPoleEnv',
|
||||
max_episode_steps=500,
|
||||
reward_threshold=475.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='MountainCar-v0',
|
||||
entry_point='gym.envs.classic_control:MountainCarEnv',
|
||||
max_episode_steps=200,
|
||||
reward_threshold=-110.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='MountainCarContinuous-v0',
|
||||
entry_point='gym.envs.classic_control:Continuous_MountainCarEnv',
|
||||
max_episode_steps=999,
|
||||
reward_threshold=90.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Pendulum-v0',
|
||||
entry_point='gym.envs.classic_control:PendulumEnv',
|
||||
max_episode_steps=200,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Acrobot-v1',
|
||||
entry_point='gym.envs.classic_control:AcrobotEnv',
|
||||
max_episode_steps=500,
|
||||
)
|
||||
|
||||
# Box2d
|
||||
# ----------------------------------------
|
||||
|
||||
register(
|
||||
id='LunarLander-v2',
|
||||
entry_point='gym.envs.box2d:LunarLander',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=200,
|
||||
)
|
||||
|
||||
register(
|
||||
id='LunarLanderContinuous-v2',
|
||||
entry_point='gym.envs.box2d:LunarLanderContinuous',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=200,
|
||||
)
|
||||
|
||||
register(
|
||||
id='BipedalWalker-v2',
|
||||
entry_point='gym.envs.box2d:BipedalWalker',
|
||||
max_episode_steps=1600,
|
||||
reward_threshold=300,
|
||||
)
|
||||
|
||||
register(
|
||||
id='BipedalWalkerHardcore-v2',
|
||||
entry_point='gym.envs.box2d:BipedalWalkerHardcore',
|
||||
max_episode_steps=2000,
|
||||
reward_threshold=300,
|
||||
)
|
||||
|
||||
register(
|
||||
id='CarRacing-v0',
|
||||
entry_point='gym.envs.box2d:CarRacing',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=900,
|
||||
)
|
||||
|
||||
# Toy Text
|
||||
# ----------------------------------------
|
||||
|
||||
register(
|
||||
id='Blackjack-v0',
|
||||
entry_point='gym.envs.toy_text:BlackjackEnv',
|
||||
)
|
||||
|
||||
register(
|
||||
id='KellyCoinflip-v0',
|
||||
entry_point='gym.envs.toy_text:KellyCoinflipEnv',
|
||||
reward_threshold=246.61,
|
||||
)
|
||||
register(
|
||||
id='KellyCoinflipGeneralized-v0',
|
||||
entry_point='gym.envs.toy_text:KellyCoinflipGeneralizedEnv',
|
||||
)
|
||||
|
||||
register(
|
||||
id='FrozenLake-v0',
|
||||
entry_point='gym.envs.toy_text:FrozenLakeEnv',
|
||||
kwargs={'map_name' : '4x4'},
|
||||
max_episode_steps=100,
|
||||
reward_threshold=0.78, # optimum = .8196
|
||||
)
|
||||
|
||||
register(
|
||||
id='FrozenLake8x8-v0',
|
||||
entry_point='gym.envs.toy_text:FrozenLakeEnv',
|
||||
kwargs={'map_name' : '8x8'},
|
||||
max_episode_steps=200,
|
||||
reward_threshold=0.99, # optimum = 1
|
||||
)
|
||||
|
||||
register(
|
||||
id='CliffWalking-v0',
|
||||
entry_point='gym.envs.toy_text:CliffWalkingEnv',
|
||||
)
|
||||
|
||||
register(
|
||||
id='NChain-v0',
|
||||
entry_point='gym.envs.toy_text:NChainEnv',
|
||||
max_episode_steps=1000,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Roulette-v0',
|
||||
entry_point='gym.envs.toy_text:RouletteEnv',
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Taxi-v2',
|
||||
entry_point='gym.envs.toy_text.taxi:TaxiEnv',
|
||||
reward_threshold=8, # optimum = 8.46
|
||||
max_episode_steps=200,
|
||||
)
|
||||
|
||||
register(
|
||||
id='GuessingGame-v0',
|
||||
entry_point='gym.envs.toy_text.guessing_game:GuessingGame',
|
||||
max_episode_steps=200,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HotterColder-v0',
|
||||
entry_point='gym.envs.toy_text.hotter_colder:HotterColder',
|
||||
max_episode_steps=200,
|
||||
)
|
||||
|
||||
# Mujoco
|
||||
# ----------------------------------------
|
||||
|
||||
# 2D
|
||||
|
||||
register(
|
||||
id='Reacher-v2',
|
||||
entry_point='gym.envs.mujoco:ReacherEnv',
|
||||
max_episode_steps=50,
|
||||
reward_threshold=-3.75,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Pusher-v2',
|
||||
entry_point='gym.envs.mujoco:PusherEnv',
|
||||
max_episode_steps=100,
|
||||
reward_threshold=0.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Thrower-v2',
|
||||
entry_point='gym.envs.mujoco:ThrowerEnv',
|
||||
max_episode_steps=100,
|
||||
reward_threshold=0.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Striker-v2',
|
||||
entry_point='gym.envs.mujoco:StrikerEnv',
|
||||
max_episode_steps=100,
|
||||
reward_threshold=0.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='InvertedPendulum-v2',
|
||||
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=950.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='InvertedDoublePendulum-v2',
|
||||
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=9100.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HalfCheetah-v2',
|
||||
entry_point='gym.envs.mujoco:HalfCheetahEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=4800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Hopper-v2',
|
||||
entry_point='gym.envs.mujoco:HopperEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=3800.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Swimmer-v2',
|
||||
entry_point='gym.envs.mujoco:SwimmerEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=360.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Walker2d-v2',
|
||||
max_episode_steps=1000,
|
||||
entry_point='gym.envs.mujoco:Walker2dEnv',
|
||||
)
|
||||
|
||||
register(
|
||||
id='Ant-v2',
|
||||
entry_point='gym.envs.mujoco:AntEnv',
|
||||
max_episode_steps=1000,
|
||||
reward_threshold=6000.0,
|
||||
)
|
||||
|
||||
register(
|
||||
id='Humanoid-v2',
|
||||
entry_point='gym.envs.mujoco:HumanoidEnv',
|
||||
max_episode_steps=1000,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HumanoidStandup-v2',
|
||||
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
|
||||
max_episode_steps=1000,
|
||||
)
|
||||
|
||||
# Robotics
|
||||
# ----------------------------------------
|
||||
|
||||
def _merge(a, b):
|
||||
a.update(b)
|
||||
return a
|
||||
|
||||
for reward_type in ['sparse', 'dense']:
|
||||
suffix = 'Dense' if reward_type == 'dense' else ''
|
||||
kwargs = {
|
||||
'reward_type': reward_type,
|
||||
}
|
||||
|
||||
# Fetch
|
||||
register(
|
||||
id='FetchSlide{}-v1'.format(suffix),
|
||||
entry_point='gym.envs.robotics:FetchSlideEnv',
|
||||
kwargs=kwargs,
|
||||
max_episode_steps=50,
|
||||
)
|
||||
|
||||
register(
|
||||
id='FetchPickAndPlace{}-v1'.format(suffix),
|
||||
entry_point='gym.envs.robotics:FetchPickAndPlaceEnv',
|
||||
kwargs=kwargs,
|
||||
max_episode_steps=50,
|
||||
)
|
||||
|
||||
register(
|
||||
id='FetchReach{}-v1'.format(suffix),
|
||||
entry_point='gym.envs.robotics:FetchReachEnv',
|
||||
kwargs=kwargs,
|
||||
max_episode_steps=50,
|
||||
)
|
||||
|
||||
register(
|
||||
id='FetchPush{}-v1'.format(suffix),
|
||||
entry_point='gym.envs.robotics:FetchPushEnv',
|
||||
kwargs=kwargs,
|
||||
max_episode_steps=50,
|
||||
)
|
||||
|
||||
# Hand
|
||||
register(
|
||||
id='HandReach{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandReachEnv',
|
||||
kwargs=kwargs,
|
||||
max_episode_steps=50,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HandManipulateBlockRotateZ{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandBlockEnv',
|
||||
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'z'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HandManipulateBlockRotateParallel{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandBlockEnv',
|
||||
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'parallel'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HandManipulateBlockRotateXYZ{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandBlockEnv',
|
||||
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HandManipulateBlockFull{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandBlockEnv',
|
||||
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
# Alias for "Full"
|
||||
register(
|
||||
id='HandManipulateBlock{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandBlockEnv',
|
||||
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HandManipulateEggRotate{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandEggEnv',
|
||||
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HandManipulateEggFull{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandEggEnv',
|
||||
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
# Alias for "Full"
|
||||
register(
|
||||
id='HandManipulateEgg{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandEggEnv',
|
||||
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HandManipulatePenRotate{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandPenEnv',
|
||||
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HandManipulatePenFull{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandPenEnv',
|
||||
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
# Alias for "Full"
|
||||
register(
|
||||
id='HandManipulatePen{}-v0'.format(suffix),
|
||||
entry_point='gym.envs.robotics:HandPenEnv',
|
||||
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
|
||||
max_episode_steps=100,
|
||||
)
|
||||
|
||||
# Atari
|
||||
# ----------------------------------------
|
||||
|
||||
# # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
|
||||
for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
|
||||
'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
|
||||
'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
|
||||
'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
|
||||
'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
|
||||
'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
|
||||
'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
|
||||
'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
|
||||
'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
|
||||
for obs_type in ['image', 'ram']:
|
||||
# space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
|
||||
name = ''.join([g.capitalize() for g in game.split('_')])
|
||||
if obs_type == 'ram':
|
||||
name = '{}-ram'.format(name)
|
||||
|
||||
nondeterministic = False
|
||||
if game == 'elevator_action' and obs_type == 'ram':
|
||||
# ElevatorAction-ram-v0 seems to yield slightly
|
||||
# non-deterministic observations about 10% of the time. We
|
||||
# should track this down eventually, but for now we just
|
||||
# mark it as nondeterministic.
|
||||
nondeterministic = True
|
||||
|
||||
register(
|
||||
id='{}-v0'.format(name),
|
||||
entry_point='gym.envs.atari:AtariEnv',
|
||||
kwargs={'game': game, 'obs_type': obs_type, 'repeat_action_probability': 0.25},
|
||||
max_episode_steps=10000,
|
||||
nondeterministic=nondeterministic,
|
||||
)
|
||||
|
||||
register(
|
||||
id='{}-v4'.format(name),
|
||||
entry_point='gym.envs.atari:AtariEnv',
|
||||
kwargs={'game': game, 'obs_type': obs_type},
|
||||
max_episode_steps=100000,
|
||||
nondeterministic=nondeterministic,
|
||||
)
|
||||
|
||||
# Standard Deterministic (as in the original DeepMind paper)
|
||||
if game == 'space_invaders':
|
||||
frameskip = 3
|
||||
else:
|
||||
frameskip = 4
|
||||
|
||||
# Use a deterministic frame skip.
|
||||
register(
|
||||
id='{}Deterministic-v0'.format(name),
|
||||
entry_point='gym.envs.atari:AtariEnv',
|
||||
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip, 'repeat_action_probability': 0.25},
|
||||
max_episode_steps=100000,
|
||||
nondeterministic=nondeterministic,
|
||||
)
|
||||
|
||||
register(
|
||||
id='{}Deterministic-v4'.format(name),
|
||||
entry_point='gym.envs.atari:AtariEnv',
|
||||
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip},
|
||||
max_episode_steps=100000,
|
||||
nondeterministic=nondeterministic,
|
||||
)
|
||||
|
||||
register(
|
||||
id='{}NoFrameskip-v0'.format(name),
|
||||
entry_point='gym.envs.atari:AtariEnv',
|
||||
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1, 'repeat_action_probability': 0.25}, # A frameskip of 1 means we get every frame
|
||||
max_episode_steps=frameskip * 100000,
|
||||
nondeterministic=nondeterministic,
|
||||
)
|
||||
|
||||
# No frameskip. (Atari has no entropy source, so these are
|
||||
# deterministic environments.)
|
||||
register(
|
||||
id='{}NoFrameskip-v4'.format(name),
|
||||
entry_point='gym.envs.atari:AtariEnv',
|
||||
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1}, # A frameskip of 1 means we get every frame
|
||||
max_episode_steps=frameskip * 100000,
|
||||
nondeterministic=nondeterministic,
|
||||
)
|
||||
|
||||
|
||||
# Unit test
|
||||
# ---------
|
||||
|
||||
register(
|
||||
id='CubeCrash-v0',
|
||||
entry_point='gym.envs.unittest:CubeCrash',
|
||||
reward_threshold=0.9,
|
||||
)
|
||||
register(
|
||||
id='CubeCrashSparse-v0',
|
||||
entry_point='gym.envs.unittest:CubeCrashSparse',
|
||||
reward_threshold=0.9,
|
||||
)
|
||||
register(
|
||||
id='CubeCrashScreenBecomesBlack-v0',
|
||||
entry_point='gym.envs.unittest:CubeCrashScreenBecomesBlack',
|
||||
reward_threshold=0.9,
|
||||
)
|
||||
|
||||
register(
|
||||
id='MemorizeDigits-v0',
|
||||
entry_point='gym.envs.unittest:MemorizeDigits',
|
||||
reward_threshold=20,
|
||||
)
|
||||
|
@ -0,0 +1,5 @@
|
||||
from gym.envs.algorithmic.copy_ import CopyEnv
|
||||
from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv
|
||||
from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv
|
||||
from gym.envs.algorithmic.reverse import ReverseEnv
|
||||
from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv
|
@ -0,0 +1,326 @@
|
||||
"""
|
||||
Algorithmic environments have the following traits in common:
|
||||
|
||||
- A 1-d "input tape" or 2-d "input grid" of characters
|
||||
- A target string which is a deterministic function of the input characters
|
||||
|
||||
Agents control a read head that moves over the input tape. Observations consist
|
||||
of the single character currently under the read head. The read head may fall
|
||||
off the end of the tape in any direction. When this happens, agents will observe
|
||||
a special blank character (with index=env.base) until they get back in bounds.
|
||||
|
||||
Actions consist of 3 sub-actions:
|
||||
- Direction to move the read head (left or right, plus up and down for 2-d envs)
|
||||
- Whether to write to the output tape
|
||||
- Which character to write (ignored if the above sub-action is 0)
|
||||
|
||||
An episode ends when:
|
||||
- The agent writes the full target string to the output tape.
|
||||
- The agent writes an incorrect character.
|
||||
- The agent runs out the time limit. (Which is fairly conservative.)
|
||||
|
||||
Reward schedule:
|
||||
write a correct character: +1
|
||||
write a wrong character: -.5
|
||||
run out the clock: -1
|
||||
otherwise: 0
|
||||
|
||||
In the beginning, input strings will be fairly short. After an environment has
|
||||
been consistently solved over some window of episodes, the environment will
|
||||
increase the average length of generated strings. Typical env specs require
|
||||
leveling up many times to reach their reward threshold.
|
||||
"""
|
||||
from gym import Env, logger
|
||||
from gym.spaces import Discrete, Tuple
|
||||
from gym.utils import colorize, seeding
|
||||
import numpy as np
|
||||
from six import StringIO
|
||||
import sys
|
||||
import math
|
||||
|
||||
class AlgorithmicEnv(Env):
|
||||
|
||||
metadata = {'render.modes': ['human', 'ansi']}
|
||||
# Only 'promote' the length of generated input strings if the worst of the
|
||||
# last n episodes was no more than this far from the maximum reward
|
||||
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -1.0
|
||||
|
||||
def __init__(self, base=10, chars=False, starting_min_length=2):
|
||||
"""
|
||||
base: Number of distinct characters.
|
||||
chars: If True, use uppercase alphabet. Otherwise, digits. Only affects
|
||||
rendering.
|
||||
starting_min_length: Minimum input string length. Ramps up as episodes
|
||||
are consistently solved.
|
||||
"""
|
||||
self.base = base
|
||||
# Keep track of this many past episodes
|
||||
self.last = 10
|
||||
# Cumulative reward earned this episode
|
||||
self.episode_total_reward = None
|
||||
# Running tally of reward shortfalls. e.g. if there were 10 points to earn and
|
||||
# we got 8, we'd append -2
|
||||
AlgorithmicEnv.reward_shortfalls = []
|
||||
if chars:
|
||||
self.charmap = [chr(ord('A')+i) for i in range(base)]
|
||||
else:
|
||||
self.charmap = [str(i) for i in range(base)]
|
||||
self.charmap.append(' ')
|
||||
# TODO: Not clear why this is a class variable rather than instance.
|
||||
# Could lead to some spooky action at a distance if someone is working
|
||||
# with multiple algorithmic envs at once. Also makes testing tricky.
|
||||
AlgorithmicEnv.min_length = starting_min_length
|
||||
# Three sub-actions:
|
||||
# 1. Move read head left or write (or up/down)
|
||||
# 2. Write or not
|
||||
# 3. Which character to write. (Ignored if should_write=0)
|
||||
self.action_space = Tuple(
|
||||
[Discrete(len(self.MOVEMENTS)), Discrete(2), Discrete(self.base)]
|
||||
)
|
||||
# Can see just what is on the input tape (one of n characters, or nothing)
|
||||
self.observation_space = Discrete(self.base + 1)
|
||||
self.seed()
|
||||
self.reset()
|
||||
|
||||
@classmethod
|
||||
def _movement_idx(kls, movement_name):
|
||||
return kls.MOVEMENTS.index(movement_name)
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def _get_obs(self, pos=None):
|
||||
"""Return an observation corresponding to the given read head position
|
||||
(or the current read head position, if none is given)."""
|
||||
raise NotImplemented
|
||||
|
||||
def _get_str_obs(self, pos=None):
|
||||
ret = self._get_obs(pos)
|
||||
return self.charmap[ret]
|
||||
|
||||
def _get_str_target(self, pos):
|
||||
"""Return the ith character of the target string (or " " if index
|
||||
out of bounds)."""
|
||||
if pos < 0 or len(self.target) <= pos:
|
||||
return " "
|
||||
else:
|
||||
return self.charmap[self.target[pos]]
|
||||
|
||||
def render_observation(self):
|
||||
"""Return a string representation of the input tape/grid."""
|
||||
raise NotImplementedError
|
||||
|
||||
def render(self, mode='human'):
|
||||
|
||||
outfile = StringIO() if mode == 'ansi' else sys.stdout
|
||||
inp = "Total length of input instance: %d, step: %d\n" % (self.input_width, self.time)
|
||||
outfile.write(inp)
|
||||
x, y, action = self.read_head_position, self.write_head_position, self.last_action
|
||||
if action is not None:
|
||||
inp_act, out_act, pred = action
|
||||
outfile.write("=" * (len(inp) - 1) + "\n")
|
||||
y_str = "Output Tape : "
|
||||
target_str = "Targets : "
|
||||
if action is not None:
|
||||
pred_str = self.charmap[pred]
|
||||
x_str = self.render_observation()
|
||||
for i in range(-2, len(self.target) + 2):
|
||||
target_str += self._get_str_target(i)
|
||||
if i < y - 1:
|
||||
y_str += self._get_str_target(i)
|
||||
elif i == (y - 1):
|
||||
if action is not None and out_act == 1:
|
||||
color = 'green' if pred == self.target[i] else 'red'
|
||||
y_str += colorize(pred_str, color, highlight=True)
|
||||
else:
|
||||
y_str += self._get_str_target(i)
|
||||
outfile.write(x_str)
|
||||
outfile.write(y_str + "\n")
|
||||
outfile.write(target_str + "\n\n")
|
||||
|
||||
if action is not None:
|
||||
outfile.write("Current reward : %.3f\n" % self.last_reward)
|
||||
outfile.write("Cumulative reward : %.3f\n" % self.episode_total_reward)
|
||||
move = self.MOVEMENTS[inp_act]
|
||||
outfile.write("Action : Tuple(move over input: %s,\n" % move)
|
||||
out_act = out_act == 1
|
||||
outfile.write(" write to the output tape: %s,\n" % out_act)
|
||||
outfile.write(" prediction: %s)\n" % pred_str)
|
||||
else:
|
||||
outfile.write("\n" * 5)
|
||||
return outfile
|
||||
|
||||
@property
|
||||
def input_width(self):
|
||||
return len(self.input_data)
|
||||
|
||||
def step(self, action):
|
||||
assert self.action_space.contains(action)
|
||||
self.last_action = action
|
||||
inp_act, out_act, pred = action
|
||||
done = False
|
||||
reward = 0.0
|
||||
self.time += 1
|
||||
assert 0 <= self.write_head_position
|
||||
if out_act == 1:
|
||||
try:
|
||||
correct = pred == self.target[self.write_head_position]
|
||||
except IndexError:
|
||||
logger.warn("It looks like you're calling step() even though this "+
|
||||
"environment has already returned done=True. You should always call "+
|
||||
"reset() once you receive done=True. Any further steps are undefined "+
|
||||
"behaviour.")
|
||||
correct = False
|
||||
if correct:
|
||||
reward = 1.0
|
||||
else:
|
||||
# Bail as soon as a wrong character is written to the tape
|
||||
reward = -0.5
|
||||
done = True
|
||||
self.write_head_position += 1
|
||||
if self.write_head_position >= len(self.target):
|
||||
done = True
|
||||
self._move(inp_act)
|
||||
if self.time > self.time_limit:
|
||||
reward = -1.0
|
||||
done = True
|
||||
obs = self._get_obs()
|
||||
self.last_reward = reward
|
||||
self.episode_total_reward += reward
|
||||
return (obs, reward, done, {})
|
||||
|
||||
@property
|
||||
def time_limit(self):
|
||||
"""If an agent takes more than this many timesteps, end the episode
|
||||
immediately and return a negative reward."""
|
||||
# (Seemingly arbitrary)
|
||||
return self.input_width + len(self.target) + 4
|
||||
|
||||
def _check_levelup(self):
|
||||
"""Called between episodes. Update our running record of episode rewards
|
||||
and, if appropriate, 'level up' minimum input length."""
|
||||
if self.episode_total_reward is None:
|
||||
# This is before the first episode/call to reset(). Nothing to do
|
||||
return
|
||||
AlgorithmicEnv.reward_shortfalls.append(self.episode_total_reward - len(self.target))
|
||||
AlgorithmicEnv.reward_shortfalls = AlgorithmicEnv.reward_shortfalls[-self.last:]
|
||||
if len(AlgorithmicEnv.reward_shortfalls) == self.last and \
|
||||
min(AlgorithmicEnv.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION and \
|
||||
AlgorithmicEnv.min_length < 30:
|
||||
AlgorithmicEnv.min_length += 1
|
||||
AlgorithmicEnv.reward_shortfalls = []
|
||||
|
||||
|
||||
def reset(self):
|
||||
self._check_levelup()
|
||||
self.last_action = None
|
||||
self.last_reward = 0
|
||||
self.read_head_position = self.READ_HEAD_START
|
||||
self.write_head_position = 0
|
||||
self.episode_total_reward = 0.0
|
||||
self.time = 0
|
||||
length = self.np_random.randint(3) + AlgorithmicEnv.min_length
|
||||
self.input_data = self.generate_input_data(length)
|
||||
self.target = self.target_from_input_data(self.input_data)
|
||||
return self._get_obs()
|
||||
|
||||
def generate_input_data(self, size):
|
||||
raise NotImplemented
|
||||
|
||||
def target_from_input_data(self, input_data):
|
||||
raise NotImplemented("Subclasses must implement")
|
||||
|
||||
def _move(self, movement):
|
||||
raise NotImplemented
|
||||
|
||||
class TapeAlgorithmicEnv(AlgorithmicEnv):
|
||||
"""An algorithmic env with a 1-d input tape."""
|
||||
MOVEMENTS = ['left', 'right']
|
||||
READ_HEAD_START = 0
|
||||
|
||||
def _move(self, movement):
|
||||
named = self.MOVEMENTS[movement]
|
||||
self.read_head_position += 1 if named == 'right' else -1
|
||||
|
||||
def _get_obs(self, pos=None):
|
||||
if pos is None:
|
||||
pos = self.read_head_position
|
||||
if pos < 0:
|
||||
return self.base
|
||||
if isinstance(pos, np.ndarray):
|
||||
pos = pos.item()
|
||||
try:
|
||||
return self.input_data[pos]
|
||||
except IndexError:
|
||||
return self.base
|
||||
|
||||
def generate_input_data(self, size):
|
||||
return [self.np_random.randint(self.base) for _ in range(size)]
|
||||
|
||||
def render_observation(self):
|
||||
x = self.read_head_position
|
||||
x_str = "Observation Tape : "
|
||||
for i in range(-2, self.input_width + 2):
|
||||
if i == x:
|
||||
x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True)
|
||||
else:
|
||||
x_str += self._get_str_obs(np.array([i]))
|
||||
x_str += "\n"
|
||||
return x_str
|
||||
|
||||
class GridAlgorithmicEnv(AlgorithmicEnv):
|
||||
"""An algorithmic env with a 2-d input grid."""
|
||||
MOVEMENTS = ['left', 'right', 'up', 'down']
|
||||
READ_HEAD_START = (0, 0)
|
||||
def __init__(self, rows, *args, **kwargs):
|
||||
self.rows = rows
|
||||
AlgorithmicEnv.__init__(self, *args, **kwargs)
|
||||
|
||||
def _move(self, movement):
|
||||
named = self.MOVEMENTS[movement]
|
||||
x, y = self.read_head_position
|
||||
if named == 'left':
|
||||
x -= 1
|
||||
elif named == 'right':
|
||||
x += 1
|
||||
elif named == 'up':
|
||||
y -= 1
|
||||
elif named == 'down':
|
||||
y += 1
|
||||
else:
|
||||
raise ValueError("Unrecognized direction: {}".format(named))
|
||||
self.read_head_position = x, y
|
||||
|
||||
def generate_input_data(self, size):
|
||||
return [
|
||||
[self.np_random.randint(self.base) for _ in range(self.rows)]
|
||||
for __ in range(size)
|
||||
]
|
||||
|
||||
def _get_obs(self, pos=None):
|
||||
if pos is None:
|
||||
pos = self.read_head_position
|
||||
x, y = pos
|
||||
if any(idx < 0 for idx in pos):
|
||||
return self.base
|
||||
try:
|
||||
return self.input_data[x][y]
|
||||
except IndexError:
|
||||
return self.base
|
||||
|
||||
def render_observation(self):
|
||||
x = self.read_head_position
|
||||
label = "Observation Grid : "
|
||||
x_str = ""
|
||||
for j in range(-1, self.rows+1):
|
||||
if j != -1:
|
||||
x_str += " " * len(label)
|
||||
for i in range(-2, self.input_width + 2):
|
||||
if i == x[0] and j == x[1]:
|
||||
x_str += colorize(self._get_str_obs((i, j)), 'green', highlight=True)
|
||||
else:
|
||||
x_str += self._get_str_obs((i, j))
|
||||
x_str += "\n"
|
||||
x_str = label + x_str
|
||||
return x_str
|
@ -0,0 +1,13 @@
|
||||
"""
|
||||
Task is to copy content from the input tape to
|
||||
the output tape. http://arxiv.org/abs/1511.07275
|
||||
"""
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
|
||||
class CopyEnv(algorithmic_env.TapeAlgorithmicEnv):
|
||||
def __init__(self, base=5, chars=True):
|
||||
super(CopyEnv, self).__init__(base=base, chars=chars)
|
||||
|
||||
def target_from_input_data(self, input_data):
|
||||
return input_data
|
||||
|
@ -0,0 +1,24 @@
|
||||
"""
|
||||
Task is to return every nth character from the input tape.
|
||||
http://arxiv.org/abs/1511.07275
|
||||
"""
|
||||
from __future__ import division
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
|
||||
class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
|
||||
def __init__(self, duplication=2, base=5):
|
||||
self.duplication = duplication
|
||||
super(DuplicatedInputEnv, self).__init__(base=base, chars=True)
|
||||
|
||||
def generate_input_data(self, size):
|
||||
res = []
|
||||
if size < self.duplication:
|
||||
size = self.duplication
|
||||
for i in range(size//self.duplication):
|
||||
char = self.np_random.randint(self.base)
|
||||
for _ in range(self.duplication):
|
||||
res.append(char)
|
||||
return res
|
||||
|
||||
def target_from_input_data(self, input_data):
|
||||
return [input_data[i] for i in range(0, len(input_data), self.duplication)]
|
@ -0,0 +1,15 @@
|
||||
"""
|
||||
Task is to copy content multiple times from the input tape to
|
||||
the output tape. http://arxiv.org/abs/1511.07275
|
||||
"""
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
|
||||
class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
|
||||
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
|
||||
def __init__(self, base=5):
|
||||
super(RepeatCopyEnv, self).__init__(base=base, chars=True)
|
||||
self.last = 50
|
||||
|
||||
def target_from_input_data(self, input_data):
|
||||
return input_data + list(reversed(input_data)) + input_data
|
||||
|
@ -0,0 +1,15 @@
|
||||
"""
|
||||
Task is to reverse content over the input tape.
|
||||
http://arxiv.org/abs/1511.07275
|
||||
"""
|
||||
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
|
||||
class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
|
||||
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
|
||||
def __init__(self, base=2):
|
||||
super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
|
||||
self.last = 50
|
||||
|
||||
def target_from_input_data(self, input_str):
|
||||
return list(reversed(input_str))
|
@ -0,0 +1,30 @@
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
from gym.envs.algorithmic import algorithmic_env
|
||||
|
||||
class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
|
||||
def __init__(self, rows=2, base=3):
|
||||
super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)
|
||||
|
||||
def target_from_input_data(self, input_strings):
|
||||
curry = 0
|
||||
target = []
|
||||
for digits in input_strings:
|
||||
total = sum(digits) + curry
|
||||
target.append(total % self.base)
|
||||
curry = total // self.base
|
||||
|
||||
if curry > 0:
|
||||
target.append(curry)
|
||||
return target
|
||||
|
||||
@property
|
||||
def time_limit(self):
|
||||
# Quirk preserved for the sake of consistency: add the length of the input
|
||||
# rather than the length of the desired output (which may differ if there's
|
||||
# an extra carried digit).
|
||||
# TODO: It seems like this time limit is so strict as to make Addition3-v0
|
||||
# unsolvable, since agents aren't even given enough time steps to look at
|
||||
# all the digits. (The solutions on the scoreboard seem to only work by
|
||||
# save-scumming.)
|
||||
return self.input_width*2 + 4
|
@ -0,0 +1,239 @@
|
||||
from gym.envs import algorithmic as alg
|
||||
import unittest
|
||||
|
||||
# All concrete subclasses of AlgorithmicEnv
|
||||
ALL_ENVS = [
|
||||
alg.copy_.CopyEnv,
|
||||
alg.duplicated_input.DuplicatedInputEnv,
|
||||
alg.repeat_copy.RepeatCopyEnv,
|
||||
alg.reverse.ReverseEnv,
|
||||
alg.reversed_addition.ReversedAdditionEnv,
|
||||
]
|
||||
ALL_TAPE_ENVS = [env for env in ALL_ENVS
|
||||
if issubclass(env, alg.algorithmic_env.TapeAlgorithmicEnv)]
|
||||
ALL_GRID_ENVS = [env for env in ALL_ENVS
|
||||
if issubclass(env, alg.algorithmic_env.GridAlgorithmicEnv)]
|
||||
|
||||
def imprint(env, input_arr):
|
||||
"""Monkey-patch the given environment so that when reset() is called, the
|
||||
input tape/grid will be set to the given data, rather than being randomly
|
||||
generated."""
|
||||
env.generate_input_data = lambda _: input_arr
|
||||
|
||||
class TestAlgorithmicEnvInteractions(unittest.TestCase):
|
||||
"""Test some generic behaviour not specific to any particular algorithmic
|
||||
environment. Movement, allocation of rewards, etc."""
|
||||
CANNED_INPUT = [0, 1]
|
||||
ENV_KLS = alg.copy_.CopyEnv
|
||||
LEFT, RIGHT = ENV_KLS._movement_idx('left'), ENV_KLS._movement_idx('right')
|
||||
def setUp(self):
|
||||
self.env = self.ENV_KLS(base=2, chars=True)
|
||||
imprint(self.env, self.CANNED_INPUT)
|
||||
|
||||
def test_successful_interaction(self):
|
||||
obs = self.env.reset()
|
||||
self.assertEqual(obs, 0)
|
||||
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
|
||||
self.assertEqual(obs, 1)
|
||||
self.assertGreater(reward, 0)
|
||||
self.assertFalse(done)
|
||||
obs, reward, done, _ = self.env.step([self.LEFT, 1, 1])
|
||||
self.assertTrue(done)
|
||||
self.assertGreater(reward, 0)
|
||||
|
||||
def test_bad_output_fail_fast(self):
|
||||
obs = self.env.reset()
|
||||
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
|
||||
self.assertTrue(done)
|
||||
self.assertLess(reward, 0)
|
||||
|
||||
def test_levelup(self):
|
||||
obs = self.env.reset()
|
||||
# Kind of a hack
|
||||
alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls = []
|
||||
min_length = self.env.min_length
|
||||
for i in range(self.env.last):
|
||||
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
|
||||
self.assertFalse(done)
|
||||
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
|
||||
self.assertTrue(done)
|
||||
self.env.reset()
|
||||
if i < self.env.last-1:
|
||||
self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), i+1)
|
||||
else:
|
||||
# Should have leveled up on the last iteration
|
||||
self.assertEqual(self.env.min_length, min_length+1)
|
||||
self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), 0)
|
||||
|
||||
def test_walk_off_the_end(self):
|
||||
obs = self.env.reset()
|
||||
# Walk off the end
|
||||
obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
|
||||
self.assertEqual(obs, self.env.base)
|
||||
self.assertEqual(r, 0)
|
||||
self.assertFalse(done)
|
||||
# Walk further off track
|
||||
obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
|
||||
self.assertEqual(obs, self.env.base)
|
||||
self.assertFalse(done)
|
||||
# Return to the first input character
|
||||
obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
|
||||
self.assertEqual(obs, self.env.base)
|
||||
self.assertFalse(done)
|
||||
obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
|
||||
self.assertEqual(obs, 0)
|
||||
|
||||
def test_grid_naviation(self):
|
||||
env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=6)
|
||||
N,S,E,W = [env._movement_idx(named_dir) for named_dir in ['up', 'down', 'right', 'left']]
|
||||
# Corresponds to a grid that looks like...
|
||||
# 0 1 2
|
||||
# 3 4 5
|
||||
canned = [ [0, 3], [1, 4], [2, 5] ]
|
||||
imprint(env, canned)
|
||||
obs = env.reset()
|
||||
self.assertEqual(obs, 0)
|
||||
navigation = [
|
||||
(S, 3), (N, 0), (E, 1), (S, 4), (S, 6), (E, 6), (N, 5), (N, 2), (W, 1)
|
||||
]
|
||||
for (movement, expected_obs) in navigation:
|
||||
obs, reward, done, _ = env.step([movement, 0, 0])
|
||||
self.assertEqual(reward, 0)
|
||||
self.assertFalse(done)
|
||||
self.assertEqual(obs, expected_obs)
|
||||
|
||||
def test_grid_success(self):
|
||||
env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=3)
|
||||
canned = [ [1, 2], [1, 0], [2, 2] ]
|
||||
imprint(env, canned)
|
||||
obs = env.reset()
|
||||
target = [0, 2, 1, 1]
|
||||
self.assertEqual(env.target, target)
|
||||
self.assertEqual(obs, 1)
|
||||
for i, target_digit in enumerate(target):
|
||||
obs, reward, done, _ = env.step([0, 1, target_digit])
|
||||
self.assertGreater(reward, 0)
|
||||
self.assertEqual(done, i==len(target)-1)
|
||||
|
||||
def test_sane_time_limit(self):
|
||||
obs = self.env.reset()
|
||||
self.assertLess(self.env.time_limit, 100)
|
||||
for _ in range(100):
|
||||
obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
|
||||
if done:
|
||||
return
|
||||
self.fail("Time limit wasn't enforced")
|
||||
|
||||
def test_rendering(self):
|
||||
env = self.env
|
||||
obs = env.reset()
|
||||
self.assertEqual(env._get_str_obs(), 'A')
|
||||
self.assertEqual(env._get_str_obs(1), 'B')
|
||||
self.assertEqual(env._get_str_obs(-1), ' ')
|
||||
self.assertEqual(env._get_str_obs(2), ' ')
|
||||
self.assertEqual(env._get_str_target(0), 'A')
|
||||
self.assertEqual(env._get_str_target(1), 'B')
|
||||
# Test numerical alphabet rendering
|
||||
env = self.ENV_KLS(base=3, chars=False)
|
||||
imprint(env, self.CANNED_INPUT)
|
||||
env.reset()
|
||||
self.assertEqual(env._get_str_obs(), '0')
|
||||
self.assertEqual(env._get_str_obs(1), '1')
|
||||
|
||||
|
||||
class TestTargets(unittest.TestCase):
|
||||
"""Test the rules mapping input strings/grids to target outputs."""
|
||||
def test_reverse_target(self):
|
||||
input_expected = [
|
||||
([0], [0]),
|
||||
([0, 1], [1, 0]),
|
||||
([1, 1], [1, 1]),
|
||||
([1, 0, 1], [1, 0, 1]),
|
||||
([0, 0, 1, 1], [1, 1, 0, 0]),
|
||||
]
|
||||
env = alg.reverse.ReverseEnv()
|
||||
for input_arr, expected in input_expected:
|
||||
target = env.target_from_input_data(input_arr)
|
||||
self.assertEqual(target, expected)
|
||||
|
||||
def test_reversed_addition_target(self):
|
||||
env = alg.reversed_addition.ReversedAdditionEnv(base=3)
|
||||
input_expected = [
|
||||
([[1,1], [1,1]], [2, 2]),
|
||||
([[2,2], [0,1]], [1, 2]),
|
||||
([[2,1], [1,1], [1,1], [1,0]], [0, 0, 0, 2]),
|
||||
]
|
||||
for (input_grid, expected_target) in input_expected:
|
||||
self.assertEqual(env.target_from_input_data(input_grid), expected_target)
|
||||
|
||||
def test_reversed_addition_3rows(self):
|
||||
env = alg.reversed_addition.ReversedAdditionEnv(base=3, rows=3)
|
||||
input_expected = [
|
||||
([[1,1,0],[0,1,1]], [2, 2]),
|
||||
([[1,1,2],[0,1,1]], [1,0,1]),
|
||||
]
|
||||
for (input_grid, expected_target) in input_expected:
|
||||
self.assertEqual(env.target_from_input_data(input_grid), expected_target)
|
||||
|
||||
def test_copy_target(self):
|
||||
env = alg.copy_.CopyEnv()
|
||||
self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2])
|
||||
|
||||
def test_duplicated_input_target(self):
|
||||
env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
|
||||
self.assertEqual(env.target_from_input_data([0, 0, 0, 0, 1, 1]), [0, 0, 1])
|
||||
|
||||
def test_repeat_copy_target(self):
|
||||
env = alg.repeat_copy.RepeatCopyEnv()
|
||||
self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2, 2, 1, 0, 0, 1, 2])
|
||||
|
||||
class TestInputGeneration(unittest.TestCase):
|
||||
"""Test random input generation.
|
||||
"""
|
||||
def test_tape_inputs(self):
|
||||
for env_kls in ALL_TAPE_ENVS:
|
||||
env = env_kls()
|
||||
for size in range(2,5):
|
||||
input_tape = env.generate_input_data(size)
|
||||
self.assertTrue(all(0<=x<=env.base for x in input_tape),
|
||||
"Invalid input tape from env {}: {}".format(env_kls, input_tape))
|
||||
# DuplicatedInput needs to generate inputs with even length,
|
||||
# so it may be short one
|
||||
self.assertLessEqual(len(input_tape), size)
|
||||
|
||||
def test_grid_inputs(self):
|
||||
for env_kls in ALL_GRID_ENVS:
|
||||
env = env_kls()
|
||||
for size in range(2, 5):
|
||||
input_grid = env.generate_input_data(size)
|
||||
# Should get "size" sublists, each of length self.rows (not the
|
||||
# opposite, as you might expect)
|
||||
self.assertEqual(len(input_grid), size)
|
||||
self.assertTrue(all(len(col) == env.rows for col in input_grid))
|
||||
self.assertTrue(all(0<=x<=env.base for x in input_grid[0]))
|
||||
|
||||
def test_duplicatedinput_inputs(self):
|
||||
"""The duplicated_input env needs to generate strings with the appropriate
|
||||
amount of repetiion."""
|
||||
env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
|
||||
input_tape = env.generate_input_data(4)
|
||||
self.assertEqual(len(input_tape), 4)
|
||||
self.assertEqual(input_tape[0], input_tape[1])
|
||||
self.assertEqual(input_tape[2], input_tape[3])
|
||||
# If requested input size isn't a multiple of duplication, go lower
|
||||
input_tape = env.generate_input_data(3)
|
||||
self.assertEqual(len(input_tape), 2)
|
||||
self.assertEqual(input_tape[0], input_tape[1])
|
||||
# If requested input size is *less than* duplication, go up
|
||||
input_tape = env.generate_input_data(1)
|
||||
self.assertEqual(len(input_tape), 2)
|
||||
self.assertEqual(input_tape[0], input_tape[1])
|
||||
|
||||
env = alg.duplicated_input.DuplicatedInputEnv(duplication=3)
|
||||
input_tape = env.generate_input_data(6)
|
||||
self.assertEqual(len(input_tape), 6)
|
||||
self.assertEqual(input_tape[0], input_tape[1])
|
||||
self.assertEqual(input_tape[1], input_tape[2])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1 @@
|
||||
from gym.envs.atari.atari_env import AtariEnv
|
@ -0,0 +1,194 @@
|
||||
import numpy as np
|
||||
import os
|
||||
import gym
|
||||
from gym import error, spaces
|
||||
from gym import utils
|
||||
from gym.utils import seeding
|
||||
|
||||
try:
|
||||
import atari_py
|
||||
except ImportError as e:
|
||||
raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)".format(e))
|
||||
|
||||
def to_ram(ale):
|
||||
ram_size = ale.getRAMSize()
|
||||
ram = np.zeros((ram_size),dtype=np.uint8)
|
||||
ale.getRAM(ram)
|
||||
return ram
|
||||
|
||||
class AtariEnv(gym.Env, utils.EzPickle):
|
||||
metadata = {'render.modes': ['human', 'rgb_array']}
|
||||
|
||||
def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5),
|
||||
repeat_action_probability=0., full_action_space=False):
|
||||
"""Frameskip should be either a tuple (indicating a random range to
|
||||
choose from, with the top value exclude), or an int."""
|
||||
|
||||
utils.EzPickle.__init__(self, game, obs_type, frameskip, repeat_action_probability)
|
||||
assert obs_type in ('ram', 'image')
|
||||
|
||||
self.game_path = atari_py.get_game_path(game)
|
||||
if not os.path.exists(self.game_path):
|
||||
raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
|
||||
self._obs_type = obs_type
|
||||
self.frameskip = frameskip
|
||||
self.ale = atari_py.ALEInterface()
|
||||
self.viewer = None
|
||||
|
||||
# Tune (or disable) ALE's action repeat:
|
||||
# https://github.com/openai/gym/issues/349
|
||||
assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
|
||||
self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)
|
||||
|
||||
self.seed()
|
||||
|
||||
self._action_set = (self.ale.getLegalActionSet() if full_action_space
|
||||
else self.ale.getMinimalActionSet())
|
||||
self.action_space = spaces.Discrete(len(self._action_set))
|
||||
|
||||
(screen_width,screen_height) = self.ale.getScreenDims()
|
||||
if self._obs_type == 'ram':
|
||||
self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
|
||||
elif self._obs_type == 'image':
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
|
||||
else:
|
||||
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed1 = seeding.np_random(seed)
|
||||
# Derive a random seed. This gets passed as a uint, but gets
|
||||
# checked as an int elsewhere, so we need to keep it below
|
||||
# 2**31.
|
||||
seed2 = seeding.hash_seed(seed1 + 1) % 2**31
|
||||
# Empirically, we need to seed before loading the ROM.
|
||||
self.ale.setInt(b'random_seed', seed2)
|
||||
self.ale.loadROM(self.game_path)
|
||||
return [seed1, seed2]
|
||||
|
||||
def step(self, a):
|
||||
reward = 0.0
|
||||
action = self._action_set[a]
|
||||
|
||||
if isinstance(self.frameskip, int):
|
||||
num_steps = self.frameskip
|
||||
else:
|
||||
num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
|
||||
for _ in range(num_steps):
|
||||
reward += self.ale.act(action)
|
||||
ob = self._get_obs()
|
||||
|
||||
return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}
|
||||
|
||||
def _get_image(self):
|
||||
return self.ale.getScreenRGB2()
|
||||
|
||||
def _get_ram(self):
|
||||
return to_ram(self.ale)
|
||||
|
||||
@property
|
||||
def _n_actions(self):
|
||||
return len(self._action_set)
|
||||
|
||||
def _get_obs(self):
|
||||
if self._obs_type == 'ram':
|
||||
return self._get_ram()
|
||||
elif self._obs_type == 'image':
|
||||
img = self._get_image()
|
||||
return img
|
||||
|
||||
# return: (states, observations)
|
||||
def reset(self):
|
||||
self.ale.reset_game()
|
||||
return self._get_obs()
|
||||
|
||||
def render(self, mode='human'):
|
||||
img = self._get_image()
|
||||
if mode == 'rgb_array':
|
||||
return img
|
||||
elif mode == 'human':
|
||||
from gym.envs.classic_control import rendering
|
||||
if self.viewer is None:
|
||||
self.viewer = rendering.SimpleImageViewer()
|
||||
self.viewer.imshow(img)
|
||||
return self.viewer.isopen
|
||||
|
||||
def close(self):
|
||||
if self.viewer is not None:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
||||
|
||||
def get_action_meanings(self):
|
||||
return [ACTION_MEANING[i] for i in self._action_set]
|
||||
|
||||
def get_keys_to_action(self):
|
||||
KEYWORD_TO_KEY = {
|
||||
'UP': ord('w'),
|
||||
'DOWN': ord('s'),
|
||||
'LEFT': ord('a'),
|
||||
'RIGHT': ord('d'),
|
||||
'FIRE': ord(' '),
|
||||
}
|
||||
|
||||
keys_to_action = {}
|
||||
|
||||
for action_id, action_meaning in enumerate(self.get_action_meanings()):
|
||||
keys = []
|
||||
for keyword, key in KEYWORD_TO_KEY.items():
|
||||
if keyword in action_meaning:
|
||||
keys.append(key)
|
||||
keys = tuple(sorted(keys))
|
||||
|
||||
assert keys not in keys_to_action
|
||||
keys_to_action[keys] = action_id
|
||||
|
||||
return keys_to_action
|
||||
|
||||
def clone_state(self):
|
||||
"""Clone emulator state w/o system state. Restoring this state will
|
||||
*not* give an identical environment. For complete cloning and restoring
|
||||
of the full state, see `{clone,restore}_full_state()`."""
|
||||
state_ref = self.ale.cloneState()
|
||||
state = self.ale.encodeState(state_ref)
|
||||
self.ale.deleteState(state_ref)
|
||||
return state
|
||||
|
||||
def restore_state(self, state):
|
||||
"""Restore emulator state w/o system state."""
|
||||
state_ref = self.ale.decodeState(state)
|
||||
self.ale.restoreState(state_ref)
|
||||
self.ale.deleteState(state_ref)
|
||||
|
||||
def clone_full_state(self):
|
||||
"""Clone emulator state w/ system state including pseudorandomness.
|
||||
Restoring this state will give an identical environment."""
|
||||
state_ref = self.ale.cloneSystemState()
|
||||
state = self.ale.encodeState(state_ref)
|
||||
self.ale.deleteState(state_ref)
|
||||
return state
|
||||
|
||||
def restore_full_state(self, state):
|
||||
"""Restore emulator state w/ system state including pseudorandomness."""
|
||||
state_ref = self.ale.decodeState(state)
|
||||
self.ale.restoreSystemState(state_ref)
|
||||
self.ale.deleteState(state_ref)
|
||||
|
||||
ACTION_MEANING = {
|
||||
0 : "NOOP",
|
||||
1 : "FIRE",
|
||||
2 : "UP",
|
||||
3 : "RIGHT",
|
||||
4 : "LEFT",
|
||||
5 : "DOWN",
|
||||
6 : "UPRIGHT",
|
||||
7 : "UPLEFT",
|
||||
8 : "DOWNRIGHT",
|
||||
9 : "DOWNLEFT",
|
||||
10 : "UPFIRE",
|
||||
11 : "RIGHTFIRE",
|
||||
12 : "LEFTFIRE",
|
||||
13 : "DOWNFIRE",
|
||||
14 : "UPRIGHTFIRE",
|
||||
15 : "UPLEFTFIRE",
|
||||
16 : "DOWNRIGHTFIRE",
|
||||
17 : "DOWNLEFTFIRE",
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
from gym.envs.box2d.lunar_lander import LunarLander
|
||||
from gym.envs.box2d.lunar_lander import LunarLanderContinuous
|
||||
from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
|
||||
from gym.envs.box2d.car_racing import CarRacing
|
@ -0,0 +1,582 @@
|
||||
import sys
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import Box2D
|
||||
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
|
||||
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import colorize, seeding, EzPickle
|
||||
|
||||
# This is simple 4-joints walker robot environment.
|
||||
#
|
||||
# There are two versions:
|
||||
#
|
||||
# - Normal, with slightly uneven terrain.
|
||||
#
|
||||
# - Hardcore with ladders, stumps, pitfalls.
|
||||
#
|
||||
# Reward is given for moving forward, total 300+ points up to the far end. If the robot falls,
|
||||
# it gets -100. Applying motor torque costs a small amount of points, more optimal agent
|
||||
# will get better score.
|
||||
#
|
||||
# Heuristic is provided for testing, it's also useful to get demonstrations to
|
||||
# learn from. To run heuristic:
|
||||
#
|
||||
# python gym/envs/box2d/bipedal_walker.py
|
||||
#
|
||||
# State consists of hull angle speed, angular velocity, horizontal speed, vertical speed,
|
||||
# position of joints and joints angular speed, legs contact with ground, and 10 lidar
|
||||
# rangefinder measurements to help to deal with the hardcore version. There's no coordinates
|
||||
# in the state vector. Lidar is less useful in normal version, but it works.
|
||||
#
|
||||
# To solve the game you need to get 300 points in 1600 time steps.
|
||||
#
|
||||
# To solve hardcore version you need 300 points in 2000 time steps.
|
||||
#
|
||||
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
|
||||
FPS = 50
|
||||
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
|
||||
|
||||
MOTORS_TORQUE = 80
|
||||
SPEED_HIP = 4
|
||||
SPEED_KNEE = 6
|
||||
LIDAR_RANGE = 160/SCALE
|
||||
|
||||
INITIAL_RANDOM = 5
|
||||
|
||||
HULL_POLY =[
|
||||
(-30,+9), (+6,+9), (+34,+1),
|
||||
(+34,-8), (-30,-8)
|
||||
]
|
||||
LEG_DOWN = -8/SCALE
|
||||
LEG_W, LEG_H = 8/SCALE, 34/SCALE
|
||||
|
||||
VIEWPORT_W = 600
|
||||
VIEWPORT_H = 400
|
||||
|
||||
TERRAIN_STEP = 14/SCALE
|
||||
TERRAIN_LENGTH = 200 # in steps
|
||||
TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
|
||||
TERRAIN_GRASS = 10 # low long are grass spots, in steps
|
||||
TERRAIN_STARTPAD = 20 # in steps
|
||||
FRICTION = 2.5
|
||||
|
||||
HULL_FD = fixtureDef(
|
||||
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
|
||||
density=5.0,
|
||||
friction=0.1,
|
||||
categoryBits=0x0020,
|
||||
maskBits=0x001, # collide only with ground
|
||||
restitution=0.0) # 0.99 bouncy
|
||||
|
||||
LEG_FD = fixtureDef(
|
||||
shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
|
||||
density=1.0,
|
||||
restitution=0.0,
|
||||
categoryBits=0x0020,
|
||||
maskBits=0x001)
|
||||
|
||||
LOWER_FD = fixtureDef(
|
||||
shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
|
||||
density=1.0,
|
||||
restitution=0.0,
|
||||
categoryBits=0x0020,
|
||||
maskBits=0x001)
|
||||
|
||||
class ContactDetector(contactListener):
|
||||
def __init__(self, env):
|
||||
contactListener.__init__(self)
|
||||
self.env = env
|
||||
def BeginContact(self, contact):
|
||||
if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
|
||||
self.env.game_over = True
|
||||
for leg in [self.env.legs[1], self.env.legs[3]]:
|
||||
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
|
||||
leg.ground_contact = True
|
||||
def EndContact(self, contact):
|
||||
for leg in [self.env.legs[1], self.env.legs[3]]:
|
||||
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
|
||||
leg.ground_contact = False
|
||||
|
||||
class BipedalWalker(gym.Env, EzPickle):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : FPS
|
||||
}
|
||||
|
||||
hardcore = False
|
||||
|
||||
def __init__(self):
|
||||
EzPickle.__init__(self)
|
||||
self.seed()
|
||||
self.viewer = None
|
||||
|
||||
self.world = Box2D.b2World()
|
||||
self.terrain = None
|
||||
self.hull = None
|
||||
|
||||
self.prev_shaping = None
|
||||
|
||||
self.fd_polygon = fixtureDef(
|
||||
shape = polygonShape(vertices=
|
||||
[(0, 0),
|
||||
(1, 0),
|
||||
(1, -1),
|
||||
(0, -1)]),
|
||||
friction = FRICTION)
|
||||
|
||||
self.fd_edge = fixtureDef(
|
||||
shape = edgeShape(vertices=
|
||||
[(0, 0),
|
||||
(1, 1)]),
|
||||
friction = FRICTION,
|
||||
categoryBits=0x0001,
|
||||
)
|
||||
|
||||
self.reset()
|
||||
|
||||
high = np.array([np.inf] * 24)
|
||||
self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
|
||||
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def _destroy(self):
|
||||
if not self.terrain: return
|
||||
self.world.contactListener = None
|
||||
for t in self.terrain:
|
||||
self.world.DestroyBody(t)
|
||||
self.terrain = []
|
||||
self.world.DestroyBody(self.hull)
|
||||
self.hull = None
|
||||
for leg in self.legs:
|
||||
self.world.DestroyBody(leg)
|
||||
self.legs = []
|
||||
self.joints = []
|
||||
|
||||
def _generate_terrain(self, hardcore):
|
||||
GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
|
||||
state = GRASS
|
||||
velocity = 0.0
|
||||
y = TERRAIN_HEIGHT
|
||||
counter = TERRAIN_STARTPAD
|
||||
oneshot = False
|
||||
self.terrain = []
|
||||
self.terrain_x = []
|
||||
self.terrain_y = []
|
||||
for i in range(TERRAIN_LENGTH):
|
||||
x = i*TERRAIN_STEP
|
||||
self.terrain_x.append(x)
|
||||
|
||||
if state==GRASS and not oneshot:
|
||||
velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
|
||||
if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE #1
|
||||
y += velocity
|
||||
|
||||
elif state==PIT and oneshot:
|
||||
counter = self.np_random.randint(3, 5)
|
||||
poly = [
|
||||
(x, y),
|
||||
(x+TERRAIN_STEP, y),
|
||||
(x+TERRAIN_STEP, y-4*TERRAIN_STEP),
|
||||
(x, y-4*TERRAIN_STEP),
|
||||
]
|
||||
self.fd_polygon.shape.vertices=poly
|
||||
t = self.world.CreateStaticBody(
|
||||
fixtures = self.fd_polygon)
|
||||
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
|
||||
self.terrain.append(t)
|
||||
|
||||
self.fd_polygon.shape.vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]
|
||||
t = self.world.CreateStaticBody(
|
||||
fixtures = self.fd_polygon)
|
||||
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
|
||||
self.terrain.append(t)
|
||||
counter += 2
|
||||
original_y = y
|
||||
|
||||
elif state==PIT and not oneshot:
|
||||
y = original_y
|
||||
if counter > 1:
|
||||
y -= 4*TERRAIN_STEP
|
||||
|
||||
elif state==STUMP and oneshot:
|
||||
counter = self.np_random.randint(1, 3)
|
||||
poly = [
|
||||
(x, y),
|
||||
(x+counter*TERRAIN_STEP, y),
|
||||
(x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
|
||||
(x, y+counter*TERRAIN_STEP),
|
||||
]
|
||||
self.fd_polygon.shape.vertices=poly
|
||||
t = self.world.CreateStaticBody(
|
||||
fixtures = self.fd_polygon)
|
||||
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
|
||||
self.terrain.append(t)
|
||||
|
||||
elif state==STAIRS and oneshot:
|
||||
stair_height = +1 if self.np_random.rand() > 0.5 else -1
|
||||
stair_width = self.np_random.randint(4, 5)
|
||||
stair_steps = self.np_random.randint(3, 5)
|
||||
original_y = y
|
||||
for s in range(stair_steps):
|
||||
poly = [
|
||||
(x+( s*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
|
||||
(x+((1+s)*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
|
||||
(x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
|
||||
(x+( s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
|
||||
]
|
||||
self.fd_polygon.shape.vertices=poly
|
||||
t = self.world.CreateStaticBody(
|
||||
fixtures = self.fd_polygon)
|
||||
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
|
||||
self.terrain.append(t)
|
||||
counter = stair_steps*stair_width
|
||||
|
||||
elif state==STAIRS and not oneshot:
|
||||
s = stair_steps*stair_width - counter - stair_height
|
||||
n = s/stair_width
|
||||
y = original_y + (n*stair_height)*TERRAIN_STEP
|
||||
|
||||
oneshot = False
|
||||
self.terrain_y.append(y)
|
||||
counter -= 1
|
||||
if counter==0:
|
||||
counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
|
||||
if state==GRASS and hardcore:
|
||||
state = self.np_random.randint(1, _STATES_)
|
||||
oneshot = True
|
||||
else:
|
||||
state = GRASS
|
||||
oneshot = True
|
||||
|
||||
self.terrain_poly = []
|
||||
for i in range(TERRAIN_LENGTH-1):
|
||||
poly = [
|
||||
(self.terrain_x[i], self.terrain_y[i]),
|
||||
(self.terrain_x[i+1], self.terrain_y[i+1])
|
||||
]
|
||||
self.fd_edge.shape.vertices=poly
|
||||
t = self.world.CreateStaticBody(
|
||||
fixtures = self.fd_edge)
|
||||
color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
|
||||
t.color1 = color
|
||||
t.color2 = color
|
||||
self.terrain.append(t)
|
||||
color = (0.4, 0.6, 0.3)
|
||||
poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
|
||||
self.terrain_poly.append( (poly, color) )
|
||||
self.terrain.reverse()
|
||||
|
||||
def _generate_clouds(self):
|
||||
# Sorry for the clouds, couldn't resist
|
||||
self.cloud_poly = []
|
||||
for i in range(TERRAIN_LENGTH//20):
|
||||
x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
|
||||
y = VIEWPORT_H/SCALE*3/4
|
||||
poly = [
|
||||
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
|
||||
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
|
||||
for a in range(5) ]
|
||||
x1 = min( [p[0] for p in poly] )
|
||||
x2 = max( [p[0] for p in poly] )
|
||||
self.cloud_poly.append( (poly,x1,x2) )
|
||||
|
||||
def reset(self):
|
||||
self._destroy()
|
||||
self.world.contactListener_bug_workaround = ContactDetector(self)
|
||||
self.world.contactListener = self.world.contactListener_bug_workaround
|
||||
self.game_over = False
|
||||
self.prev_shaping = None
|
||||
self.scroll = 0.0
|
||||
self.lidar_render = 0
|
||||
|
||||
W = VIEWPORT_W/SCALE
|
||||
H = VIEWPORT_H/SCALE
|
||||
|
||||
self._generate_terrain(self.hardcore)
|
||||
self._generate_clouds()
|
||||
|
||||
init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
|
||||
init_y = TERRAIN_HEIGHT+2*LEG_H
|
||||
self.hull = self.world.CreateDynamicBody(
|
||||
position = (init_x, init_y),
|
||||
fixtures = HULL_FD
|
||||
)
|
||||
self.hull.color1 = (0.5,0.4,0.9)
|
||||
self.hull.color2 = (0.3,0.3,0.5)
|
||||
self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
|
||||
|
||||
self.legs = []
|
||||
self.joints = []
|
||||
for i in [-1,+1]:
|
||||
leg = self.world.CreateDynamicBody(
|
||||
position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
|
||||
angle = (i*0.05),
|
||||
fixtures = LEG_FD
|
||||
)
|
||||
leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
|
||||
leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
|
||||
rjd = revoluteJointDef(
|
||||
bodyA=self.hull,
|
||||
bodyB=leg,
|
||||
localAnchorA=(0, LEG_DOWN),
|
||||
localAnchorB=(0, LEG_H/2),
|
||||
enableMotor=True,
|
||||
enableLimit=True,
|
||||
maxMotorTorque=MOTORS_TORQUE,
|
||||
motorSpeed = i,
|
||||
lowerAngle = -0.8,
|
||||
upperAngle = 1.1,
|
||||
)
|
||||
self.legs.append(leg)
|
||||
self.joints.append(self.world.CreateJoint(rjd))
|
||||
|
||||
lower = self.world.CreateDynamicBody(
|
||||
position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
|
||||
angle = (i*0.05),
|
||||
fixtures = LOWER_FD
|
||||
)
|
||||
lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
|
||||
lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
|
||||
rjd = revoluteJointDef(
|
||||
bodyA=leg,
|
||||
bodyB=lower,
|
||||
localAnchorA=(0, -LEG_H/2),
|
||||
localAnchorB=(0, LEG_H/2),
|
||||
enableMotor=True,
|
||||
enableLimit=True,
|
||||
maxMotorTorque=MOTORS_TORQUE,
|
||||
motorSpeed = 1,
|
||||
lowerAngle = -1.6,
|
||||
upperAngle = -0.1,
|
||||
)
|
||||
lower.ground_contact = False
|
||||
self.legs.append(lower)
|
||||
self.joints.append(self.world.CreateJoint(rjd))
|
||||
|
||||
self.drawlist = self.terrain + self.legs + [self.hull]
|
||||
|
||||
class LidarCallback(Box2D.b2.rayCastCallback):
|
||||
def ReportFixture(self, fixture, point, normal, fraction):
|
||||
if (fixture.filterData.categoryBits & 1) == 0:
|
||||
return 1
|
||||
self.p2 = point
|
||||
self.fraction = fraction
|
||||
return 0
|
||||
self.lidar = [LidarCallback() for _ in range(10)]
|
||||
|
||||
return self.step(np.array([0,0,0,0]))[0]
|
||||
|
||||
def step(self, action):
|
||||
#self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
|
||||
control_speed = False # Should be easier as well
|
||||
if control_speed:
|
||||
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
|
||||
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
|
||||
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
|
||||
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
|
||||
else:
|
||||
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
|
||||
self.joints[0].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1))
|
||||
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
|
||||
self.joints[1].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1))
|
||||
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
|
||||
self.joints[2].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1))
|
||||
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
|
||||
self.joints[3].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1))
|
||||
|
||||
self.world.Step(1.0/FPS, 6*30, 2*30)
|
||||
|
||||
pos = self.hull.position
|
||||
vel = self.hull.linearVelocity
|
||||
|
||||
for i in range(10):
|
||||
self.lidar[i].fraction = 1.0
|
||||
self.lidar[i].p1 = pos
|
||||
self.lidar[i].p2 = (
|
||||
pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
|
||||
pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
|
||||
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
|
||||
|
||||
state = [
|
||||
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
|
||||
2.0*self.hull.angularVelocity/FPS,
|
||||
0.3*vel.x*(VIEWPORT_W/SCALE)/FPS, # Normalized to get -1..1 range
|
||||
0.3*vel.y*(VIEWPORT_H/SCALE)/FPS,
|
||||
self.joints[0].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
|
||||
self.joints[0].speed / SPEED_HIP,
|
||||
self.joints[1].angle + 1.0,
|
||||
self.joints[1].speed / SPEED_KNEE,
|
||||
1.0 if self.legs[1].ground_contact else 0.0,
|
||||
self.joints[2].angle,
|
||||
self.joints[2].speed / SPEED_HIP,
|
||||
self.joints[3].angle + 1.0,
|
||||
self.joints[3].speed / SPEED_KNEE,
|
||||
1.0 if self.legs[3].ground_contact else 0.0
|
||||
]
|
||||
state += [l.fraction for l in self.lidar]
|
||||
assert len(state)==24
|
||||
|
||||
self.scroll = pos.x - VIEWPORT_W/SCALE/5
|
||||
|
||||
shaping = 130*pos[0]/SCALE # moving forward is a way to receive reward (normalized to get 300 on completion)
|
||||
shaping -= 5.0*abs(state[0]) # keep head straight, other than that and falling, any behavior is unpunished
|
||||
|
||||
reward = 0
|
||||
if self.prev_shaping is not None:
|
||||
reward = shaping - self.prev_shaping
|
||||
self.prev_shaping = shaping
|
||||
|
||||
for a in action:
|
||||
reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
|
||||
# normalized to about -50.0 using heuristic, more optimal agent should spend less
|
||||
|
||||
done = False
|
||||
if self.game_over or pos[0] < 0:
|
||||
reward = -100
|
||||
done = True
|
||||
if pos[0] > (TERRAIN_LENGTH-TERRAIN_GRASS)*TERRAIN_STEP:
|
||||
done = True
|
||||
return np.array(state), reward, done, {}
|
||||
|
||||
def render(self, mode='human'):
|
||||
from gym.envs.classic_control import rendering
|
||||
if self.viewer is None:
|
||||
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
|
||||
self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
|
||||
|
||||
self.viewer.draw_polygon( [
|
||||
(self.scroll, 0),
|
||||
(self.scroll+VIEWPORT_W/SCALE, 0),
|
||||
(self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
|
||||
(self.scroll, VIEWPORT_H/SCALE),
|
||||
], color=(0.9, 0.9, 1.0) )
|
||||
for poly,x1,x2 in self.cloud_poly:
|
||||
if x2 < self.scroll/2: continue
|
||||
if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
|
||||
self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
|
||||
for poly, color in self.terrain_poly:
|
||||
if poly[1][0] < self.scroll: continue
|
||||
if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
|
||||
self.viewer.draw_polygon(poly, color=color)
|
||||
|
||||
self.lidar_render = (self.lidar_render+1) % 100
|
||||
i = self.lidar_render
|
||||
if i < 2*len(self.lidar):
|
||||
l = self.lidar[i] if i < len(self.lidar) else self.lidar[len(self.lidar)-i-1]
|
||||
self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
|
||||
|
||||
for obj in self.drawlist:
|
||||
for f in obj.fixtures:
|
||||
trans = f.body.transform
|
||||
if type(f.shape) is circleShape:
|
||||
t = rendering.Transform(translation=trans*f.shape.pos)
|
||||
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
|
||||
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
|
||||
else:
|
||||
path = [trans*v for v in f.shape.vertices]
|
||||
self.viewer.draw_polygon(path, color=obj.color1)
|
||||
path.append(path[0])
|
||||
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
|
||||
|
||||
flagy1 = TERRAIN_HEIGHT
|
||||
flagy2 = flagy1 + 50/SCALE
|
||||
x = TERRAIN_STEP*3
|
||||
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
|
||||
f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
|
||||
self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
|
||||
self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
|
||||
|
||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||
|
||||
def close(self):
|
||||
if self.viewer is not None:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
||||
|
||||
class BipedalWalkerHardcore(BipedalWalker):
|
||||
hardcore = True
|
||||
|
||||
if __name__=="__main__":
|
||||
# Heurisic: suboptimal, have no notion of balance.
|
||||
env = BipedalWalker()
|
||||
env.reset()
|
||||
steps = 0
|
||||
total_reward = 0
|
||||
a = np.array([0.0, 0.0, 0.0, 0.0])
|
||||
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1,2,3
|
||||
SPEED = 0.29 # Will fall forward on higher speed
|
||||
state = STAY_ON_ONE_LEG
|
||||
moving_leg = 0
|
||||
supporting_leg = 1 - moving_leg
|
||||
SUPPORT_KNEE_ANGLE = +0.1
|
||||
supporting_knee_angle = SUPPORT_KNEE_ANGLE
|
||||
while True:
|
||||
s, r, done, info = env.step(a)
|
||||
total_reward += r
|
||||
if steps % 20 == 0 or done:
|
||||
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
|
||||
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
|
||||
print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4] ]))
|
||||
print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9] ]))
|
||||
print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
|
||||
steps += 1
|
||||
|
||||
contact0 = s[8]
|
||||
contact1 = s[13]
|
||||
moving_s_base = 4 + 5*moving_leg
|
||||
supporting_s_base = 4 + 5*supporting_leg
|
||||
|
||||
hip_targ = [None,None] # -0.8 .. +1.1
|
||||
knee_targ = [None,None] # -0.6 .. +0.9
|
||||
hip_todo = [0.0, 0.0]
|
||||
knee_todo = [0.0, 0.0]
|
||||
|
||||
if state==STAY_ON_ONE_LEG:
|
||||
hip_targ[moving_leg] = 1.1
|
||||
knee_targ[moving_leg] = -0.6
|
||||
supporting_knee_angle += 0.03
|
||||
if s[2] > SPEED: supporting_knee_angle += 0.03
|
||||
supporting_knee_angle = min( supporting_knee_angle, SUPPORT_KNEE_ANGLE )
|
||||
knee_targ[supporting_leg] = supporting_knee_angle
|
||||
if s[supporting_s_base+0] < 0.10: # supporting leg is behind
|
||||
state = PUT_OTHER_DOWN
|
||||
if state==PUT_OTHER_DOWN:
|
||||
hip_targ[moving_leg] = +0.1
|
||||
knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
|
||||
knee_targ[supporting_leg] = supporting_knee_angle
|
||||
if s[moving_s_base+4]:
|
||||
state = PUSH_OFF
|
||||
supporting_knee_angle = min( s[moving_s_base+2], SUPPORT_KNEE_ANGLE )
|
||||
if state==PUSH_OFF:
|
||||
knee_targ[moving_leg] = supporting_knee_angle
|
||||
knee_targ[supporting_leg] = +1.0
|
||||
if s[supporting_s_base+2] > 0.88 or s[2] > 1.2*SPEED:
|
||||
state = STAY_ON_ONE_LEG
|
||||
moving_leg = 1 - moving_leg
|
||||
supporting_leg = 1 - moving_leg
|
||||
|
||||
if hip_targ[0]: hip_todo[0] = 0.9*(hip_targ[0] - s[4]) - 0.25*s[5]
|
||||
if hip_targ[1]: hip_todo[1] = 0.9*(hip_targ[1] - s[9]) - 0.25*s[10]
|
||||
if knee_targ[0]: knee_todo[0] = 4.0*(knee_targ[0] - s[6]) - 0.25*s[7]
|
||||
if knee_targ[1]: knee_todo[1] = 4.0*(knee_targ[1] - s[11]) - 0.25*s[12]
|
||||
|
||||
hip_todo[0] -= 0.9*(0-s[0]) - 1.5*s[1] # PID to keep head strait
|
||||
hip_todo[1] -= 0.9*(0-s[0]) - 1.5*s[1]
|
||||
knee_todo[0] -= 15.0*s[3] # vertical speed, to damp oscillations
|
||||
knee_todo[1] -= 15.0*s[3]
|
||||
|
||||
a[0] = hip_todo[0]
|
||||
a[1] = knee_todo[0]
|
||||
a[2] = hip_todo[1]
|
||||
a[3] = knee_todo[1]
|
||||
a = np.clip(0.5*a, -1.0, 1.0)
|
||||
|
||||
env.render()
|
||||
if done: break
|
@ -0,0 +1,244 @@
|
||||
import numpy as np
|
||||
import math
|
||||
import Box2D
|
||||
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
|
||||
|
||||
# Top-down car dynamics simulation.
|
||||
#
|
||||
# Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
|
||||
# This simulation is a bit more detailed, with wheels rotation.
|
||||
#
|
||||
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
|
||||
SIZE = 0.02
|
||||
ENGINE_POWER = 100000000*SIZE*SIZE
|
||||
WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
|
||||
FRICTION_LIMIT = 1000000*SIZE*SIZE # friction ~= mass ~= size^2 (calculated implicitly using density)
|
||||
WHEEL_R = 27
|
||||
WHEEL_W = 14
|
||||
WHEELPOS = [
|
||||
(-55,+80), (+55,+80),
|
||||
(-55,-82), (+55,-82)
|
||||
]
|
||||
HULL_POLY1 =[
|
||||
(-60,+130), (+60,+130),
|
||||
(+60,+110), (-60,+110)
|
||||
]
|
||||
HULL_POLY2 =[
|
||||
(-15,+120), (+15,+120),
|
||||
(+20, +20), (-20, 20)
|
||||
]
|
||||
HULL_POLY3 =[
|
||||
(+25, +20),
|
||||
(+50, -10),
|
||||
(+50, -40),
|
||||
(+20, -90),
|
||||
(-20, -90),
|
||||
(-50, -40),
|
||||
(-50, -10),
|
||||
(-25, +20)
|
||||
]
|
||||
HULL_POLY4 =[
|
||||
(-50,-120), (+50,-120),
|
||||
(+50,-90), (-50,-90)
|
||||
]
|
||||
WHEEL_COLOR = (0.0,0.0,0.0)
|
||||
WHEEL_WHITE = (0.3,0.3,0.3)
|
||||
MUD_COLOR = (0.4,0.4,0.0)
|
||||
|
||||
class Car:
|
||||
def __init__(self, world, init_angle, init_x, init_y):
|
||||
self.world = world
|
||||
self.hull = self.world.CreateDynamicBody(
|
||||
position = (init_x, init_y),
|
||||
angle = init_angle,
|
||||
fixtures = [
|
||||
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0),
|
||||
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0),
|
||||
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0),
|
||||
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0)
|
||||
]
|
||||
)
|
||||
self.hull.color = (0.8,0.0,0.0)
|
||||
self.wheels = []
|
||||
self.fuel_spent = 0.0
|
||||
WHEEL_POLY = [
|
||||
(-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R),
|
||||
(+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R)
|
||||
]
|
||||
for wx,wy in WHEELPOS:
|
||||
front_k = 1.0 if wy > 0 else 1.0
|
||||
w = self.world.CreateDynamicBody(
|
||||
position = (init_x+wx*SIZE, init_y+wy*SIZE),
|
||||
angle = init_angle,
|
||||
fixtures = fixtureDef(
|
||||
shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]),
|
||||
density=0.1,
|
||||
categoryBits=0x0020,
|
||||
maskBits=0x001,
|
||||
restitution=0.0)
|
||||
)
|
||||
w.wheel_rad = front_k*WHEEL_R*SIZE
|
||||
w.color = WHEEL_COLOR
|
||||
w.gas = 0.0
|
||||
w.brake = 0.0
|
||||
w.steer = 0.0
|
||||
w.phase = 0.0 # wheel angle
|
||||
w.omega = 0.0 # angular velocity
|
||||
w.skid_start = None
|
||||
w.skid_particle = None
|
||||
rjd = revoluteJointDef(
|
||||
bodyA=self.hull,
|
||||
bodyB=w,
|
||||
localAnchorA=(wx*SIZE,wy*SIZE),
|
||||
localAnchorB=(0,0),
|
||||
enableMotor=True,
|
||||
enableLimit=True,
|
||||
maxMotorTorque=180*900*SIZE*SIZE,
|
||||
motorSpeed = 0,
|
||||
lowerAngle = -0.4,
|
||||
upperAngle = +0.4,
|
||||
)
|
||||
w.joint = self.world.CreateJoint(rjd)
|
||||
w.tiles = set()
|
||||
w.userData = w
|
||||
self.wheels.append(w)
|
||||
self.drawlist = self.wheels + [self.hull]
|
||||
self.particles = []
|
||||
|
||||
def gas(self, gas):
|
||||
'control: rear wheel drive'
|
||||
gas = np.clip(gas, 0, 1)
|
||||
for w in self.wheels[2:4]:
|
||||
diff = gas - w.gas
|
||||
if diff > 0.1: diff = 0.1 # gradually increase, but stop immediately
|
||||
w.gas += diff
|
||||
|
||||
def brake(self, b):
|
||||
'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
|
||||
for w in self.wheels:
|
||||
w.brake = b
|
||||
|
||||
def steer(self, s):
|
||||
'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
|
||||
self.wheels[0].steer = s
|
||||
self.wheels[1].steer = s
|
||||
|
||||
def step(self, dt):
|
||||
for w in self.wheels:
|
||||
# Steer each wheel
|
||||
dir = np.sign(w.steer - w.joint.angle)
|
||||
val = abs(w.steer - w.joint.angle)
|
||||
w.joint.motorSpeed = dir*min(50.0*val, 3.0)
|
||||
|
||||
# Position => friction_limit
|
||||
grass = True
|
||||
friction_limit = FRICTION_LIMIT*0.6 # Grass friction if no tile
|
||||
for tile in w.tiles:
|
||||
friction_limit = max(friction_limit, FRICTION_LIMIT*tile.road_friction)
|
||||
grass = False
|
||||
|
||||
# Force
|
||||
forw = w.GetWorldVector( (0,1) )
|
||||
side = w.GetWorldVector( (1,0) )
|
||||
v = w.linearVelocity
|
||||
vf = forw[0]*v[0] + forw[1]*v[1] # forward speed
|
||||
vs = side[0]*v[0] + side[1]*v[1] # side speed
|
||||
|
||||
# WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
|
||||
# WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
|
||||
# domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
|
||||
w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0) # small coef not to divide by zero
|
||||
self.fuel_spent += dt*ENGINE_POWER*w.gas
|
||||
|
||||
if w.brake >= 0.9:
|
||||
w.omega = 0
|
||||
elif w.brake > 0:
|
||||
BRAKE_FORCE = 15 # radians per second
|
||||
dir = -np.sign(w.omega)
|
||||
val = BRAKE_FORCE*w.brake
|
||||
if abs(val) > abs(w.omega): val = abs(w.omega) # low speed => same as = 0
|
||||
w.omega += dir*val
|
||||
w.phase += w.omega*dt
|
||||
|
||||
vr = w.omega*w.wheel_rad # rotating wheel speed
|
||||
f_force = -vf + vr # force direction is direction of speed difference
|
||||
p_force = -vs
|
||||
|
||||
# Physically correct is to always apply friction_limit until speed is equal.
|
||||
# But dt is finite, that will lead to oscillations if difference is already near zero.
|
||||
f_force *= 205000*SIZE*SIZE # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
|
||||
p_force *= 205000*SIZE*SIZE
|
||||
force = np.sqrt(np.square(f_force) + np.square(p_force))
|
||||
|
||||
# Skid trace
|
||||
if abs(force) > 2.0*friction_limit:
|
||||
if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30:
|
||||
w.skid_particle.poly.append( (w.position[0], w.position[1]) )
|
||||
elif w.skid_start is None:
|
||||
w.skid_start = w.position
|
||||
else:
|
||||
w.skid_particle = self._create_particle( w.skid_start, w.position, grass )
|
||||
w.skid_start = None
|
||||
else:
|
||||
w.skid_start = None
|
||||
w.skid_particle = None
|
||||
|
||||
if abs(force) > friction_limit:
|
||||
f_force /= force
|
||||
p_force /= force
|
||||
force = friction_limit # Correct physics here
|
||||
f_force *= force
|
||||
p_force *= force
|
||||
|
||||
w.omega -= dt*f_force*w.wheel_rad/WHEEL_MOMENT_OF_INERTIA
|
||||
|
||||
w.ApplyForceToCenter( (
|
||||
p_force*side[0] + f_force*forw[0],
|
||||
p_force*side[1] + f_force*forw[1]), True )
|
||||
|
||||
def draw(self, viewer, draw_particles=True):
|
||||
if draw_particles:
|
||||
for p in self.particles:
|
||||
viewer.draw_polyline(p.poly, color=p.color, linewidth=5)
|
||||
for obj in self.drawlist:
|
||||
for f in obj.fixtures:
|
||||
trans = f.body.transform
|
||||
path = [trans*v for v in f.shape.vertices]
|
||||
viewer.draw_polygon(path, color=obj.color)
|
||||
if "phase" not in obj.__dict__: continue
|
||||
a1 = obj.phase
|
||||
a2 = obj.phase + 1.2 # radians
|
||||
s1 = math.sin(a1)
|
||||
s2 = math.sin(a2)
|
||||
c1 = math.cos(a1)
|
||||
c2 = math.cos(a2)
|
||||
if s1>0 and s2>0: continue
|
||||
if s1>0: c1 = np.sign(c1)
|
||||
if s2>0: c2 = np.sign(c2)
|
||||
white_poly = [
|
||||
(-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
|
||||
(+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
|
||||
]
|
||||
viewer.draw_polygon([trans*v for v in white_poly], color=WHEEL_WHITE)
|
||||
|
||||
def _create_particle(self, point1, point2, grass):
|
||||
class Particle:
|
||||
pass
|
||||
p = Particle()
|
||||
p.color = WHEEL_COLOR if not grass else MUD_COLOR
|
||||
p.ttl = 1
|
||||
p.poly = [(point1[0],point1[1]), (point2[0],point2[1])]
|
||||
p.grass = grass
|
||||
self.particles.append(p)
|
||||
while len(self.particles) > 30:
|
||||
self.particles.pop(0)
|
||||
return p
|
||||
|
||||
def destroy(self):
|
||||
self.world.DestroyBody(self.hull)
|
||||
self.hull = None
|
||||
for w in self.wheels:
|
||||
self.world.DestroyBody(w)
|
||||
self.wheels = []
|
||||
|
@ -0,0 +1,498 @@
|
||||
import sys, math
|
||||
import numpy as np
|
||||
|
||||
import Box2D
|
||||
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
|
||||
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.envs.box2d.car_dynamics import Car
|
||||
from gym.utils import colorize, seeding, EzPickle
|
||||
|
||||
import pyglet
|
||||
from pyglet import gl
|
||||
|
||||
# Easiest continuous control task to learn from pixels, a top-down racing environment.
|
||||
# Discreet control is reasonable in this environment as well, on/off discretisation is
|
||||
# fine.
|
||||
#
|
||||
# State consists of STATE_W x STATE_H pixels.
|
||||
#
|
||||
# Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
|
||||
# the total number of tiles in track. For example, if you have finished in 732 frames,
|
||||
# your reward is 1000 - 0.1*732 = 926.8 points.
|
||||
#
|
||||
# Game is solved when agent consistently gets 900+ points. Track is random every episode.
|
||||
#
|
||||
# Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
|
||||
# is far off the track, then it will get -100 and die.
|
||||
#
|
||||
# Some indicators shown at the bottom of the window and the state RGB buffer. From
|
||||
# left to right: true speed, four ABS sensors, steering wheel position, gyroscope.
|
||||
#
|
||||
# To play yourself (it's rather fast for humans), type:
|
||||
#
|
||||
# python gym/envs/box2d/car_racing.py
|
||||
#
|
||||
# Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
|
||||
# same time.
|
||||
#
|
||||
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
|
||||
STATE_W = 96 # less than Atari 160x192
|
||||
STATE_H = 96
|
||||
VIDEO_W = 600
|
||||
VIDEO_H = 400
|
||||
WINDOW_W = 1200
|
||||
WINDOW_H = 1000
|
||||
|
||||
SCALE = 6.0 # Track scale
|
||||
TRACK_RAD = 900/SCALE # Track is heavily morphed circle with this radius
|
||||
PLAYFIELD = 2000/SCALE # Game over boundary
|
||||
FPS = 50
|
||||
ZOOM = 2.7 # Camera zoom
|
||||
ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom)
|
||||
|
||||
|
||||
TRACK_DETAIL_STEP = 21/SCALE
|
||||
TRACK_TURN_RATE = 0.31
|
||||
TRACK_WIDTH = 40/SCALE
|
||||
BORDER = 8/SCALE
|
||||
BORDER_MIN_COUNT = 4
|
||||
|
||||
ROAD_COLOR = [0.4, 0.4, 0.4]
|
||||
|
||||
class FrictionDetector(contactListener):
|
||||
def __init__(self, env):
|
||||
contactListener.__init__(self)
|
||||
self.env = env
|
||||
def BeginContact(self, contact):
|
||||
self._contact(contact, True)
|
||||
def EndContact(self, contact):
|
||||
self._contact(contact, False)
|
||||
def _contact(self, contact, begin):
|
||||
tile = None
|
||||
obj = None
|
||||
u1 = contact.fixtureA.body.userData
|
||||
u2 = contact.fixtureB.body.userData
|
||||
if u1 and "road_friction" in u1.__dict__:
|
||||
tile = u1
|
||||
obj = u2
|
||||
if u2 and "road_friction" in u2.__dict__:
|
||||
tile = u2
|
||||
obj = u1
|
||||
if not tile: return
|
||||
|
||||
tile.color[0] = ROAD_COLOR[0]
|
||||
tile.color[1] = ROAD_COLOR[1]
|
||||
tile.color[2] = ROAD_COLOR[2]
|
||||
if not obj or "tiles" not in obj.__dict__: return
|
||||
if begin:
|
||||
obj.tiles.add(tile)
|
||||
#print tile.road_friction, "ADD", len(obj.tiles)
|
||||
if not tile.road_visited:
|
||||
tile.road_visited = True
|
||||
self.env.reward += 1000.0/len(self.env.track)
|
||||
self.env.tile_visited_count += 1
|
||||
else:
|
||||
obj.tiles.remove(tile)
|
||||
#print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)
|
||||
|
||||
class CarRacing(gym.Env, EzPickle):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array', 'state_pixels'],
|
||||
'video.frames_per_second' : FPS
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
EzPickle.__init__(self)
|
||||
self.seed()
|
||||
self.contactListener_keepref = FrictionDetector(self)
|
||||
self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
|
||||
self.viewer = None
|
||||
self.invisible_state_window = None
|
||||
self.invisible_video_window = None
|
||||
self.road = None
|
||||
self.car = None
|
||||
self.reward = 0.0
|
||||
self.prev_reward = 0.0
|
||||
|
||||
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32) # steer, gas, brake
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def _destroy(self):
|
||||
if not self.road: return
|
||||
for t in self.road:
|
||||
self.world.DestroyBody(t)
|
||||
self.road = []
|
||||
self.car.destroy()
|
||||
|
||||
def _create_track(self):
|
||||
CHECKPOINTS = 12
|
||||
|
||||
# Create checkpoints
|
||||
checkpoints = []
|
||||
for c in range(CHECKPOINTS):
|
||||
alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
|
||||
rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
|
||||
if c==0:
|
||||
alpha = 0
|
||||
rad = 1.5*TRACK_RAD
|
||||
if c==CHECKPOINTS-1:
|
||||
alpha = 2*math.pi*c/CHECKPOINTS
|
||||
self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
|
||||
rad = 1.5*TRACK_RAD
|
||||
checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
|
||||
|
||||
#print "\n".join(str(h) for h in checkpoints)
|
||||
#self.road_poly = [ ( # uncomment this to see checkpoints
|
||||
# [ (tx,ty) for a,tx,ty in checkpoints ],
|
||||
# (0.7,0.7,0.9) ) ]
|
||||
self.road = []
|
||||
|
||||
# Go from one checkpoint to another to create track
|
||||
x, y, beta = 1.5*TRACK_RAD, 0, 0
|
||||
dest_i = 0
|
||||
laps = 0
|
||||
track = []
|
||||
no_freeze = 2500
|
||||
visited_other_side = False
|
||||
while 1:
|
||||
alpha = math.atan2(y, x)
|
||||
if visited_other_side and alpha > 0:
|
||||
laps += 1
|
||||
visited_other_side = False
|
||||
if alpha < 0:
|
||||
visited_other_side = True
|
||||
alpha += 2*math.pi
|
||||
while True: # Find destination from checkpoints
|
||||
failed = True
|
||||
while True:
|
||||
dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
|
||||
if alpha <= dest_alpha:
|
||||
failed = False
|
||||
break
|
||||
dest_i += 1
|
||||
if dest_i % len(checkpoints) == 0: break
|
||||
if not failed: break
|
||||
alpha -= 2*math.pi
|
||||
continue
|
||||
r1x = math.cos(beta)
|
||||
r1y = math.sin(beta)
|
||||
p1x = -r1y
|
||||
p1y = r1x
|
||||
dest_dx = dest_x - x # vector towards destination
|
||||
dest_dy = dest_y - y
|
||||
proj = r1x*dest_dx + r1y*dest_dy # destination vector projected on rad
|
||||
while beta - alpha > 1.5*math.pi: beta -= 2*math.pi
|
||||
while beta - alpha < -1.5*math.pi: beta += 2*math.pi
|
||||
prev_beta = beta
|
||||
proj *= SCALE
|
||||
if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
|
||||
if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj))
|
||||
x += p1x*TRACK_DETAIL_STEP
|
||||
y += p1y*TRACK_DETAIL_STEP
|
||||
track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
|
||||
if laps > 4: break
|
||||
no_freeze -= 1
|
||||
if no_freeze==0: break
|
||||
#print "\n".join([str(t) for t in enumerate(track)])
|
||||
|
||||
# Find closed loop range i1..i2, first loop should be ignored, second is OK
|
||||
i1, i2 = -1, -1
|
||||
i = len(track)
|
||||
while True:
|
||||
i -= 1
|
||||
if i==0: return False # Failed
|
||||
pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
|
||||
if pass_through_start and i2==-1:
|
||||
i2 = i
|
||||
elif pass_through_start and i1==-1:
|
||||
i1 = i
|
||||
break
|
||||
print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
|
||||
assert i1!=-1
|
||||
assert i2!=-1
|
||||
|
||||
track = track[i1:i2-1]
|
||||
|
||||
first_beta = track[0][1]
|
||||
first_perp_x = math.cos(first_beta)
|
||||
first_perp_y = math.sin(first_beta)
|
||||
# Length of perpendicular jump to put together head and tail
|
||||
well_glued_together = np.sqrt(
|
||||
np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
|
||||
np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
|
||||
if well_glued_together > TRACK_DETAIL_STEP:
|
||||
return False
|
||||
|
||||
# Red-white border on hard turns
|
||||
border = [False]*len(track)
|
||||
for i in range(len(track)):
|
||||
good = True
|
||||
oneside = 0
|
||||
for neg in range(BORDER_MIN_COUNT):
|
||||
beta1 = track[i-neg-0][1]
|
||||
beta2 = track[i-neg-1][1]
|
||||
good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
|
||||
oneside += np.sign(beta1 - beta2)
|
||||
good &= abs(oneside) == BORDER_MIN_COUNT
|
||||
border[i] = good
|
||||
for i in range(len(track)):
|
||||
for neg in range(BORDER_MIN_COUNT):
|
||||
border[i-neg] |= border[i]
|
||||
|
||||
# Create tiles
|
||||
for i in range(len(track)):
|
||||
alpha1, beta1, x1, y1 = track[i]
|
||||
alpha2, beta2, x2, y2 = track[i-1]
|
||||
road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
|
||||
road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
|
||||
road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
|
||||
road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
|
||||
t = self.world.CreateStaticBody( fixtures = fixtureDef(
|
||||
shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])
|
||||
))
|
||||
t.userData = t
|
||||
c = 0.01*(i%3)
|
||||
t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
|
||||
t.road_visited = False
|
||||
t.road_friction = 1.0
|
||||
t.fixtures[0].sensor = True
|
||||
self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
|
||||
self.road.append(t)
|
||||
if border[i]:
|
||||
side = np.sign(beta2 - beta1)
|
||||
b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1))
|
||||
b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
|
||||
b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2))
|
||||
b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
|
||||
self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
|
||||
self.track = track
|
||||
return True
|
||||
|
||||
def reset(self):
|
||||
self._destroy()
|
||||
self.reward = 0.0
|
||||
self.prev_reward = 0.0
|
||||
self.tile_visited_count = 0
|
||||
self.t = 0.0
|
||||
self.road_poly = []
|
||||
self.human_render = False
|
||||
|
||||
while True:
|
||||
success = self._create_track()
|
||||
if success: break
|
||||
print("retry to generate track (normal if there are not many of this messages)")
|
||||
self.car = Car(self.world, *self.track[0][1:4])
|
||||
|
||||
return self.step(None)[0]
|
||||
|
||||
def step(self, action):
|
||||
if action is not None:
|
||||
self.car.steer(-action[0])
|
||||
self.car.gas(action[1])
|
||||
self.car.brake(action[2])
|
||||
|
||||
self.car.step(1.0/FPS)
|
||||
self.world.Step(1.0/FPS, 6*30, 2*30)
|
||||
self.t += 1.0/FPS
|
||||
|
||||
self.state = self.render("state_pixels")
|
||||
|
||||
step_reward = 0
|
||||
done = False
|
||||
if action is not None: # First step without action, called from reset()
|
||||
self.reward -= 0.1
|
||||
# We actually don't want to count fuel spent, we want car to be faster.
|
||||
#self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER
|
||||
self.car.fuel_spent = 0.0
|
||||
step_reward = self.reward - self.prev_reward
|
||||
self.prev_reward = self.reward
|
||||
if self.tile_visited_count==len(self.track):
|
||||
done = True
|
||||
x, y = self.car.hull.position
|
||||
if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
|
||||
done = True
|
||||
step_reward = -100
|
||||
|
||||
return self.state, step_reward, done, {}
|
||||
|
||||
def render(self, mode='human'):
|
||||
if self.viewer is None:
|
||||
from gym.envs.classic_control import rendering
|
||||
self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
|
||||
self.score_label = pyglet.text.Label('0000', font_size=36,
|
||||
x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
|
||||
color=(255,255,255,255))
|
||||
self.transform = rendering.Transform()
|
||||
|
||||
if "t" not in self.__dict__: return # reset() not called yet
|
||||
|
||||
zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second
|
||||
zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W
|
||||
zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W
|
||||
scroll_x = self.car.hull.position[0]
|
||||
scroll_y = self.car.hull.position[1]
|
||||
angle = -self.car.hull.angle
|
||||
vel = self.car.hull.linearVelocity
|
||||
if np.linalg.norm(vel) > 0.5:
|
||||
angle = math.atan2(vel[0], vel[1])
|
||||
self.transform.set_scale(zoom, zoom)
|
||||
self.transform.set_translation(
|
||||
WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
|
||||
WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
|
||||
self.transform.set_rotation(angle)
|
||||
|
||||
self.car.draw(self.viewer, mode!="state_pixels")
|
||||
|
||||
arr = None
|
||||
win = self.viewer.window
|
||||
if mode != 'state_pixels':
|
||||
win.switch_to()
|
||||
win.dispatch_events()
|
||||
if mode=="rgb_array" or mode=="state_pixels":
|
||||
win.clear()
|
||||
t = self.transform
|
||||
if mode=='rgb_array':
|
||||
VP_W = VIDEO_W
|
||||
VP_H = VIDEO_H
|
||||
else:
|
||||
VP_W = STATE_W
|
||||
VP_H = STATE_H
|
||||
gl.glViewport(0, 0, VP_W, VP_H)
|
||||
t.enable()
|
||||
self.render_road()
|
||||
for geom in self.viewer.onetime_geoms:
|
||||
geom.render()
|
||||
t.disable()
|
||||
self.render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf
|
||||
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
|
||||
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
|
||||
arr = arr.reshape(VP_H, VP_W, 4)
|
||||
arr = arr[::-1, :, 0:3]
|
||||
|
||||
if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video.
|
||||
win.flip()
|
||||
|
||||
if mode=='human':
|
||||
self.human_render = True
|
||||
win.clear()
|
||||
t = self.transform
|
||||
gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
|
||||
t.enable()
|
||||
self.render_road()
|
||||
for geom in self.viewer.onetime_geoms:
|
||||
geom.render()
|
||||
t.disable()
|
||||
self.render_indicators(WINDOW_W, WINDOW_H)
|
||||
win.flip()
|
||||
|
||||
self.viewer.onetime_geoms = []
|
||||
return arr
|
||||
|
||||
def close(self):
|
||||
if self.viewer is not None:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
||||
|
||||
def render_road(self):
|
||||
gl.glBegin(gl.GL_QUADS)
|
||||
gl.glColor4f(0.4, 0.8, 0.4, 1.0)
|
||||
gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
|
||||
gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
|
||||
gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
|
||||
gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
|
||||
gl.glColor4f(0.4, 0.9, 0.4, 1.0)
|
||||
k = PLAYFIELD/20.0
|
||||
for x in range(-20, 20, 2):
|
||||
for y in range(-20, 20, 2):
|
||||
gl.glVertex3f(k*x + k, k*y + 0, 0)
|
||||
gl.glVertex3f(k*x + 0, k*y + 0, 0)
|
||||
gl.glVertex3f(k*x + 0, k*y + k, 0)
|
||||
gl.glVertex3f(k*x + k, k*y + k, 0)
|
||||
for poly, color in self.road_poly:
|
||||
gl.glColor4f(color[0], color[1], color[2], 1)
|
||||
for p in poly:
|
||||
gl.glVertex3f(p[0], p[1], 0)
|
||||
gl.glEnd()
|
||||
|
||||
def render_indicators(self, W, H):
|
||||
gl.glBegin(gl.GL_QUADS)
|
||||
s = W/40.0
|
||||
h = H/40.0
|
||||
gl.glColor4f(0,0,0,1)
|
||||
gl.glVertex3f(W, 0, 0)
|
||||
gl.glVertex3f(W, 5*h, 0)
|
||||
gl.glVertex3f(0, 5*h, 0)
|
||||
gl.glVertex3f(0, 0, 0)
|
||||
def vertical_ind(place, val, color):
|
||||
gl.glColor4f(color[0], color[1], color[2], 1)
|
||||
gl.glVertex3f((place+0)*s, h + h*val, 0)
|
||||
gl.glVertex3f((place+1)*s, h + h*val, 0)
|
||||
gl.glVertex3f((place+1)*s, h, 0)
|
||||
gl.glVertex3f((place+0)*s, h, 0)
|
||||
def horiz_ind(place, val, color):
|
||||
gl.glColor4f(color[0], color[1], color[2], 1)
|
||||
gl.glVertex3f((place+0)*s, 4*h , 0)
|
||||
gl.glVertex3f((place+val)*s, 4*h, 0)
|
||||
gl.glVertex3f((place+val)*s, 2*h, 0)
|
||||
gl.glVertex3f((place+0)*s, 2*h, 0)
|
||||
true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
|
||||
vertical_ind(5, 0.02*true_speed, (1,1,1))
|
||||
vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
|
||||
vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
|
||||
vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
|
||||
vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
|
||||
horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
|
||||
horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
|
||||
gl.glEnd()
|
||||
self.score_label.text = "%04i" % self.reward
|
||||
self.score_label.draw()
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
from pyglet.window import key
|
||||
a = np.array( [0.0, 0.0, 0.0] )
|
||||
def key_press(k, mod):
|
||||
global restart
|
||||
if k==0xff0d: restart = True
|
||||
if k==key.LEFT: a[0] = -1.0
|
||||
if k==key.RIGHT: a[0] = +1.0
|
||||
if k==key.UP: a[1] = +1.0
|
||||
if k==key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
|
||||
def key_release(k, mod):
|
||||
if k==key.LEFT and a[0]==-1.0: a[0] = 0
|
||||
if k==key.RIGHT and a[0]==+1.0: a[0] = 0
|
||||
if k==key.UP: a[1] = 0
|
||||
if k==key.DOWN: a[2] = 0
|
||||
env = CarRacing()
|
||||
env.render()
|
||||
record_video = False
|
||||
if record_video:
|
||||
env.monitor.start('/tmp/video-test', force=True)
|
||||
env.viewer.window.on_key_press = key_press
|
||||
env.viewer.window.on_key_release = key_release
|
||||
while True:
|
||||
env.reset()
|
||||
total_reward = 0.0
|
||||
steps = 0
|
||||
restart = False
|
||||
while True:
|
||||
s, r, done, info = env.step(a)
|
||||
total_reward += r
|
||||
if steps % 200 == 0 or done:
|
||||
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
|
||||
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
|
||||
#import matplotlib.pyplot as plt
|
||||
#plt.imshow(s)
|
||||
#plt.savefig("test.jpeg")
|
||||
steps += 1
|
||||
if not record_video: # Faster, but you can as well call env.render() every time to play full window.
|
||||
env.render()
|
||||
if done or restart: break
|
||||
env.close()
|
@ -0,0 +1,420 @@
|
||||
import sys, math
|
||||
import numpy as np
|
||||
|
||||
import Box2D
|
||||
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
|
||||
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding, EzPickle
|
||||
|
||||
# Rocket trajectory optimization is a classic topic in Optimal Control.
|
||||
#
|
||||
# According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
|
||||
# turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
|
||||
#
|
||||
# Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
|
||||
# Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
|
||||
# If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
|
||||
# comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
|
||||
# engine is -0.3 points each frame. Solved is 200 points.
|
||||
#
|
||||
# Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
|
||||
# on its first attempt. Please see source code for details.
|
||||
#
|
||||
# To see heuristic landing, run:
|
||||
#
|
||||
# python gym/envs/box2d/lunar_lander.py
|
||||
#
|
||||
# To play yourself, run:
|
||||
#
|
||||
# python examples/agents/keyboard_agent.py LunarLander-v2
|
||||
#
|
||||
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
||||
|
||||
FPS = 50
|
||||
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
|
||||
|
||||
MAIN_ENGINE_POWER = 13.0
|
||||
SIDE_ENGINE_POWER = 0.6
|
||||
|
||||
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
|
||||
|
||||
LANDER_POLY =[
|
||||
(-14,+17), (-17,0), (-17,-10),
|
||||
(+17,-10), (+17,0), (+14,+17)
|
||||
]
|
||||
LEG_AWAY = 20
|
||||
LEG_DOWN = 18
|
||||
LEG_W, LEG_H = 2, 8
|
||||
LEG_SPRING_TORQUE = 40
|
||||
|
||||
SIDE_ENGINE_HEIGHT = 14.0
|
||||
SIDE_ENGINE_AWAY = 12.0
|
||||
|
||||
VIEWPORT_W = 600
|
||||
VIEWPORT_H = 400
|
||||
|
||||
class ContactDetector(contactListener):
|
||||
def __init__(self, env):
|
||||
contactListener.__init__(self)
|
||||
self.env = env
|
||||
def BeginContact(self, contact):
|
||||
if self.env.lander==contact.fixtureA.body or self.env.lander==contact.fixtureB.body:
|
||||
self.env.game_over = True
|
||||
for i in range(2):
|
||||
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
|
||||
self.env.legs[i].ground_contact = True
|
||||
def EndContact(self, contact):
|
||||
for i in range(2):
|
||||
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
|
||||
self.env.legs[i].ground_contact = False
|
||||
|
||||
class LunarLander(gym.Env, EzPickle):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : FPS
|
||||
}
|
||||
|
||||
continuous = False
|
||||
|
||||
def __init__(self):
|
||||
EzPickle.__init__(self)
|
||||
self.seed()
|
||||
self.viewer = None
|
||||
|
||||
self.world = Box2D.b2World()
|
||||
self.moon = None
|
||||
self.lander = None
|
||||
self.particles = []
|
||||
|
||||
self.prev_reward = None
|
||||
|
||||
# useful range is -1 .. +1, but spikes can be higher
|
||||
self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
|
||||
|
||||
if self.continuous:
|
||||
# Action is two floats [main engine, left-right engines].
|
||||
# Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
|
||||
# Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
|
||||
self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32)
|
||||
else:
|
||||
# Nop, fire left engine, main engine, right engine
|
||||
self.action_space = spaces.Discrete(4)
|
||||
|
||||
self.reset()
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def _destroy(self):
|
||||
if not self.moon: return
|
||||
self.world.contactListener = None
|
||||
self._clean_particles(True)
|
||||
self.world.DestroyBody(self.moon)
|
||||
self.moon = None
|
||||
self.world.DestroyBody(self.lander)
|
||||
self.lander = None
|
||||
self.world.DestroyBody(self.legs[0])
|
||||
self.world.DestroyBody(self.legs[1])
|
||||
|
||||
def reset(self):
|
||||
self._destroy()
|
||||
self.world.contactListener_keepref = ContactDetector(self)
|
||||
self.world.contactListener = self.world.contactListener_keepref
|
||||
self.game_over = False
|
||||
self.prev_shaping = None
|
||||
|
||||
W = VIEWPORT_W/SCALE
|
||||
H = VIEWPORT_H/SCALE
|
||||
|
||||
# terrain
|
||||
CHUNKS = 11
|
||||
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
|
||||
chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
|
||||
self.helipad_x1 = chunk_x[CHUNKS//2-1]
|
||||
self.helipad_x2 = chunk_x[CHUNKS//2+1]
|
||||
self.helipad_y = H/4
|
||||
height[CHUNKS//2-2] = self.helipad_y
|
||||
height[CHUNKS//2-1] = self.helipad_y
|
||||
height[CHUNKS//2+0] = self.helipad_y
|
||||
height[CHUNKS//2+1] = self.helipad_y
|
||||
height[CHUNKS//2+2] = self.helipad_y
|
||||
smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
|
||||
|
||||
self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) )
|
||||
self.sky_polys = []
|
||||
for i in range(CHUNKS-1):
|
||||
p1 = (chunk_x[i], smooth_y[i])
|
||||
p2 = (chunk_x[i+1], smooth_y[i+1])
|
||||
self.moon.CreateEdgeFixture(
|
||||
vertices=[p1,p2],
|
||||
density=0,
|
||||
friction=0.1)
|
||||
self.sky_polys.append( [p1, p2, (p2[0],H), (p1[0],H)] )
|
||||
|
||||
self.moon.color1 = (0.0,0.0,0.0)
|
||||
self.moon.color2 = (0.0,0.0,0.0)
|
||||
|
||||
initial_y = VIEWPORT_H/SCALE
|
||||
self.lander = self.world.CreateDynamicBody(
|
||||
position = (VIEWPORT_W/SCALE/2, initial_y),
|
||||
angle=0.0,
|
||||
fixtures = fixtureDef(
|
||||
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in LANDER_POLY ]),
|
||||
density=5.0,
|
||||
friction=0.1,
|
||||
categoryBits=0x0010,
|
||||
maskBits=0x001, # collide only with ground
|
||||
restitution=0.0) # 0.99 bouncy
|
||||
)
|
||||
self.lander.color1 = (0.5,0.4,0.9)
|
||||
self.lander.color2 = (0.3,0.3,0.5)
|
||||
self.lander.ApplyForceToCenter( (
|
||||
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
|
||||
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
|
||||
), True)
|
||||
|
||||
self.legs = []
|
||||
for i in [-1,+1]:
|
||||
leg = self.world.CreateDynamicBody(
|
||||
position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
|
||||
angle = (i*0.05),
|
||||
fixtures = fixtureDef(
|
||||
shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
|
||||
density=1.0,
|
||||
restitution=0.0,
|
||||
categoryBits=0x0020,
|
||||
maskBits=0x001)
|
||||
)
|
||||
leg.ground_contact = False
|
||||
leg.color1 = (0.5,0.4,0.9)
|
||||
leg.color2 = (0.3,0.3,0.5)
|
||||
rjd = revoluteJointDef(
|
||||
bodyA=self.lander,
|
||||
bodyB=leg,
|
||||
localAnchorA=(0, 0),
|
||||
localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
|
||||
enableMotor=True,
|
||||
enableLimit=True,
|
||||
maxMotorTorque=LEG_SPRING_TORQUE,
|
||||
motorSpeed=+0.3*i # low enough not to jump back into the sky
|
||||
)
|
||||
if i==-1:
|
||||
rjd.lowerAngle = +0.9 - 0.5 # Yes, the most esoteric numbers here, angles legs have freedom to travel within
|
||||
rjd.upperAngle = +0.9
|
||||
else:
|
||||
rjd.lowerAngle = -0.9
|
||||
rjd.upperAngle = -0.9 + 0.5
|
||||
leg.joint = self.world.CreateJoint(rjd)
|
||||
self.legs.append(leg)
|
||||
|
||||
self.drawlist = [self.lander] + self.legs
|
||||
|
||||
return self.step(np.array([0,0]) if self.continuous else 0)[0]
|
||||
|
||||
def _create_particle(self, mass, x, y, ttl):
|
||||
p = self.world.CreateDynamicBody(
|
||||
position = (x,y),
|
||||
angle=0.0,
|
||||
fixtures = fixtureDef(
|
||||
shape=circleShape(radius=2/SCALE, pos=(0,0)),
|
||||
density=mass,
|
||||
friction=0.1,
|
||||
categoryBits=0x0100,
|
||||
maskBits=0x001, # collide only with ground
|
||||
restitution=0.3)
|
||||
)
|
||||
p.ttl = ttl
|
||||
self.particles.append(p)
|
||||
self._clean_particles(False)
|
||||
return p
|
||||
|
||||
def _clean_particles(self, all):
|
||||
while self.particles and (all or self.particles[0].ttl<0):
|
||||
self.world.DestroyBody(self.particles.pop(0))
|
||||
|
||||
def step(self, action):
|
||||
if self.continuous:
|
||||
action = np.clip(action, -1, +1).astype(np.float32)
|
||||
else:
|
||||
assert self.action_space.contains(action), "%r (%s) invalid " % (action, type(action))
|
||||
|
||||
# Engines
|
||||
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
|
||||
side = (-tip[1], tip[0]);
|
||||
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
|
||||
|
||||
m_power = 0.0
|
||||
if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
|
||||
# Main engine
|
||||
if self.continuous:
|
||||
m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
|
||||
assert m_power>=0.5 and m_power <= 1.0
|
||||
else:
|
||||
m_power = 1.0
|
||||
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
|
||||
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
|
||||
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
|
||||
p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate
|
||||
p.ApplyLinearImpulse( ( ox*MAIN_ENGINE_POWER*m_power, oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
|
||||
self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
|
||||
|
||||
s_power = 0.0
|
||||
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
|
||||
# Orientation engines
|
||||
if self.continuous:
|
||||
direction = np.sign(action[1])
|
||||
s_power = np.clip(np.abs(action[1]), 0.5,1.0)
|
||||
assert s_power>=0.5 and s_power <= 1.0
|
||||
else:
|
||||
direction = action-2
|
||||
s_power = 1.0
|
||||
ox = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
||||
oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
|
||||
impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
|
||||
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
|
||||
p.ApplyLinearImpulse( ( ox*SIDE_ENGINE_POWER*s_power, oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
|
||||
self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
|
||||
|
||||
self.world.Step(1.0/FPS, 6*30, 2*30)
|
||||
|
||||
pos = self.lander.position
|
||||
vel = self.lander.linearVelocity
|
||||
state = [
|
||||
(pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
|
||||
(pos.y - (self.helipad_y+LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2),
|
||||
vel.x*(VIEWPORT_W/SCALE/2)/FPS,
|
||||
vel.y*(VIEWPORT_H/SCALE/2)/FPS,
|
||||
self.lander.angle,
|
||||
20.0*self.lander.angularVelocity/FPS,
|
||||
1.0 if self.legs[0].ground_contact else 0.0,
|
||||
1.0 if self.legs[1].ground_contact else 0.0
|
||||
]
|
||||
assert len(state)==8
|
||||
|
||||
reward = 0
|
||||
shaping = \
|
||||
- 100*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
|
||||
- 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
|
||||
- 100*abs(state[4]) + 10*state[6] + 10*state[7] # And ten points for legs contact, the idea is if you
|
||||
# lose contact again after landing, you get negative reward
|
||||
if self.prev_shaping is not None:
|
||||
reward = shaping - self.prev_shaping
|
||||
self.prev_shaping = shaping
|
||||
|
||||
reward -= m_power*0.30 # less fuel spent is better, about -30 for heurisic landing
|
||||
reward -= s_power*0.03
|
||||
|
||||
done = False
|
||||
if self.game_over or abs(state[0]) >= 1.0:
|
||||
done = True
|
||||
reward = -100
|
||||
if not self.lander.awake:
|
||||
done = True
|
||||
reward = +100
|
||||
return np.array(state, dtype=np.float32), reward, done, {}
|
||||
|
||||
def render(self, mode='human'):
|
||||
from gym.envs.classic_control import rendering
|
||||
if self.viewer is None:
|
||||
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
|
||||
self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
|
||||
|
||||
for obj in self.particles:
|
||||
obj.ttl -= 0.15
|
||||
obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
|
||||
obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
|
||||
|
||||
self._clean_particles(False)
|
||||
|
||||
for p in self.sky_polys:
|
||||
self.viewer.draw_polygon(p, color=(0,0,0))
|
||||
|
||||
for obj in self.particles + self.drawlist:
|
||||
for f in obj.fixtures:
|
||||
trans = f.body.transform
|
||||
if type(f.shape) is circleShape:
|
||||
t = rendering.Transform(translation=trans*f.shape.pos)
|
||||
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
|
||||
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
|
||||
else:
|
||||
path = [trans*v for v in f.shape.vertices]
|
||||
self.viewer.draw_polygon(path, color=obj.color1)
|
||||
path.append(path[0])
|
||||
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
|
||||
|
||||
for x in [self.helipad_x1, self.helipad_x2]:
|
||||
flagy1 = self.helipad_y
|
||||
flagy2 = flagy1 + 50/SCALE
|
||||
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(1,1,1) )
|
||||
self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
|
||||
|
||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||
|
||||
def close(self):
|
||||
if self.viewer is not None:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
||||
|
||||
class LunarLanderContinuous(LunarLander):
|
||||
continuous = True
|
||||
|
||||
def heuristic(env, s):
|
||||
# Heuristic for:
|
||||
# 1. Testing.
|
||||
# 2. Demonstration rollout.
|
||||
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
|
||||
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
|
||||
if angle_targ < -0.4: angle_targ = -0.4
|
||||
hover_targ = 0.55*np.abs(s[0]) # target y should be proporional to horizontal offset
|
||||
|
||||
# PID controller: s[4] angle, s[5] angularSpeed
|
||||
angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
|
||||
#print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
|
||||
|
||||
# PID controller: s[1] vertical coordinate s[3] vertical speed
|
||||
hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
|
||||
#print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
|
||||
|
||||
if s[6] or s[7]: # legs have contact
|
||||
angle_todo = 0
|
||||
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
|
||||
|
||||
if env.continuous:
|
||||
a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
|
||||
a = np.clip(a, -1, +1)
|
||||
else:
|
||||
a = 0
|
||||
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
|
||||
elif angle_todo < -0.05: a = 3
|
||||
elif angle_todo > +0.05: a = 1
|
||||
return a
|
||||
|
||||
def demo_heuristic_lander(env, seed=None, render=False):
|
||||
env.seed(seed)
|
||||
total_reward = 0
|
||||
steps = 0
|
||||
s = env.reset()
|
||||
while True:
|
||||
a = heuristic(env, s)
|
||||
s, r, done, info = env.step(a)
|
||||
total_reward += r
|
||||
|
||||
if render:
|
||||
still_open = env.render()
|
||||
if still_open == False: break
|
||||
|
||||
if steps % 20 == 0 or done:
|
||||
print("observations:", " ".join(["{:+0.2f}".format(x) for x in s]))
|
||||
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
|
||||
steps += 1
|
||||
if done: break
|
||||
return total_reward
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
demo_heuristic_lander(LunarLander(), render=True)
|
||||
|
||||
|
@ -0,0 +1,13 @@
|
||||
from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
|
||||
|
||||
def test_lunar_lander():
|
||||
_test_lander(LunarLander(), seed=0)
|
||||
|
||||
def test_lunar_lander_continuous():
|
||||
_test_lander(LunarLanderContinuous(), seed=0)
|
||||
|
||||
def _test_lander(env, seed=None, render=False):
|
||||
total_reward = demo_heuristic_lander(env, seed=seed, render=render)
|
||||
assert total_reward > 100
|
||||
|
||||
|
@ -0,0 +1,6 @@
|
||||
from gym.envs.classic_control.cartpole import CartPoleEnv
|
||||
from gym.envs.classic_control.mountain_car import MountainCarEnv
|
||||
from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
|
||||
from gym.envs.classic_control.pendulum import PendulumEnv
|
||||
from gym.envs.classic_control.acrobot import AcrobotEnv
|
||||
|
@ -0,0 +1,305 @@
|
||||
"""classic Acrobot task"""
|
||||
import numpy as np
|
||||
from numpy import sin, cos, pi
|
||||
|
||||
from gym import core, spaces
|
||||
from gym.utils import seeding
|
||||
|
||||
__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
|
||||
__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
|
||||
"William Dabney", "Jonathan P. How"]
|
||||
__license__ = "BSD 3-Clause"
|
||||
__author__ = "Christoph Dann <cdann@cdann.de>"
|
||||
|
||||
# SOURCE:
|
||||
# https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
|
||||
|
||||
class AcrobotEnv(core.Env):
|
||||
|
||||
"""
|
||||
Acrobot is a 2-link pendulum with only the second joint actuated
|
||||
Intitially, both links point downwards. The goal is to swing the
|
||||
end-effector at a height at least the length of one link above the base.
|
||||
Both links can swing freely and can pass by each other, i.e., they don't
|
||||
collide when they have the same angle.
|
||||
**STATE:**
|
||||
The state consists of the sin() and cos() of the two rotational joint
|
||||
angles and the joint angular velocities :
|
||||
[cos(theta1) sin(theta1) cos(theta2) sin(theta2) thetaDot1 thetaDot2].
|
||||
For the first link, an angle of 0 corresponds to the link pointing downwards.
|
||||
The angle of the second link is relative to the angle of the first link.
|
||||
An angle of 0 corresponds to having the same angle between the two links.
|
||||
A state of [1, 0, 1, 0, ..., ...] means that both links point downwards.
|
||||
**ACTIONS:**
|
||||
The action is either applying +1, 0 or -1 torque on the joint between
|
||||
the two pendulum links.
|
||||
.. note::
|
||||
The dynamics equations were missing some terms in the NIPS paper which
|
||||
are present in the book. R. Sutton confirmed in personal correspondance
|
||||
that the experimental results shown in the paper and the book were
|
||||
generated with the equations shown in the book.
|
||||
However, there is the option to run the domain with the paper equations
|
||||
by setting book_or_nips = 'nips'
|
||||
**REFERENCE:**
|
||||
.. seealso::
|
||||
R. Sutton: Generalization in Reinforcement Learning:
|
||||
Successful Examples Using Sparse Coarse Coding (NIPS 1996)
|
||||
.. seealso::
|
||||
R. Sutton and A. G. Barto:
|
||||
Reinforcement learning: An introduction.
|
||||
Cambridge: MIT press, 1998.
|
||||
.. warning::
|
||||
This version of the domain uses the Runge-Kutta method for integrating
|
||||
the system dynamics and is more realistic, but also considerably harder
|
||||
than the original version which employs Euler integration,
|
||||
see the AcrobotLegacy class.
|
||||
"""
|
||||
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 15
|
||||
}
|
||||
|
||||
dt = .2
|
||||
|
||||
LINK_LENGTH_1 = 1. # [m]
|
||||
LINK_LENGTH_2 = 1. # [m]
|
||||
LINK_MASS_1 = 1. #: [kg] mass of link 1
|
||||
LINK_MASS_2 = 1. #: [kg] mass of link 2
|
||||
LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1
|
||||
LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2
|
||||
LINK_MOI = 1. #: moments of inertia for both links
|
||||
|
||||
MAX_VEL_1 = 4 * np.pi
|
||||
MAX_VEL_2 = 9 * np.pi
|
||||
|
||||
AVAIL_TORQUE = [-1., 0., +1]
|
||||
|
||||
torque_noise_max = 0.
|
||||
|
||||
#: use dynamics equations from the nips paper or the book
|
||||
book_or_nips = "book"
|
||||
action_arrow = None
|
||||
domain_fig = None
|
||||
actions_num = 3
|
||||
|
||||
def __init__(self):
|
||||
self.viewer = None
|
||||
high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
|
||||
low = -high
|
||||
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
|
||||
self.action_space = spaces.Discrete(3)
|
||||
self.state = None
|
||||
self.seed()
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def reset(self):
|
||||
self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
|
||||
return self._get_ob()
|
||||
|
||||
def step(self, a):
|
||||
s = self.state
|
||||
torque = self.AVAIL_TORQUE[a]
|
||||
|
||||
# Add noise to the force action
|
||||
if self.torque_noise_max > 0:
|
||||
torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
|
||||
|
||||
# Now, augment the state with our force action so it can be passed to
|
||||
# _dsdt
|
||||
s_augmented = np.append(s, torque)
|
||||
|
||||
ns = rk4(self._dsdt, s_augmented, [0, self.dt])
|
||||
# only care about final timestep of integration returned by integrator
|
||||
ns = ns[-1]
|
||||
ns = ns[:4] # omit action
|
||||
# ODEINT IS TOO SLOW!
|
||||
# ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
|
||||
# self.s_continuous = ns_continuous[-1] # We only care about the state
|
||||
# at the ''final timestep'', self.dt
|
||||
|
||||
ns[0] = wrap(ns[0], -pi, pi)
|
||||
ns[1] = wrap(ns[1], -pi, pi)
|
||||
ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
|
||||
ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
|
||||
self.state = ns
|
||||
terminal = self._terminal()
|
||||
reward = -1. if not terminal else 0.
|
||||
return (self._get_ob(), reward, terminal, {})
|
||||
|
||||
def _get_ob(self):
|
||||
s = self.state
|
||||
return np.array([cos(s[0]), np.sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]])
|
||||
|
||||
def _terminal(self):
|
||||
s = self.state
|
||||
return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.)
|
||||
|
||||
def _dsdt(self, s_augmented, t):
|
||||
m1 = self.LINK_MASS_1
|
||||
m2 = self.LINK_MASS_2
|
||||
l1 = self.LINK_LENGTH_1
|
||||
lc1 = self.LINK_COM_POS_1
|
||||
lc2 = self.LINK_COM_POS_2
|
||||
I1 = self.LINK_MOI
|
||||
I2 = self.LINK_MOI
|
||||
g = 9.8
|
||||
a = s_augmented[-1]
|
||||
s = s_augmented[:-1]
|
||||
theta1 = s[0]
|
||||
theta2 = s[1]
|
||||
dtheta1 = s[2]
|
||||
dtheta2 = s[3]
|
||||
d1 = m1 * lc1 ** 2 + m2 * \
|
||||
(l1 ** 2 + lc2 ** 2 + 2 * l1 * lc2 * np.cos(theta2)) + I1 + I2
|
||||
d2 = m2 * (lc2 ** 2 + l1 * lc2 * np.cos(theta2)) + I2
|
||||
phi2 = m2 * lc2 * g * np.cos(theta1 + theta2 - np.pi / 2.)
|
||||
phi1 = - m2 * l1 * lc2 * dtheta2 ** 2 * np.sin(theta2) \
|
||||
- 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * np.sin(theta2) \
|
||||
+ (m1 * lc1 + m2 * l1) * g * np.cos(theta1 - np.pi / 2) + phi2
|
||||
if self.book_or_nips == "nips":
|
||||
# the following line is consistent with the description in the
|
||||
# paper
|
||||
ddtheta2 = (a + d2 / d1 * phi1 - phi2) / \
|
||||
(m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
|
||||
else:
|
||||
# the following line is consistent with the java implementation and the
|
||||
# book
|
||||
ddtheta2 = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1 ** 2 * np.sin(theta2) - phi2) \
|
||||
/ (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
|
||||
ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
|
||||
return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.)
|
||||
|
||||
def render(self, mode='human'):
|
||||
from gym.envs.classic_control import rendering
|
||||
|
||||
s = self.state
|
||||
|
||||
if self.viewer is None:
|
||||
self.viewer = rendering.Viewer(500,500)
|
||||
bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default
|
||||
self.viewer.set_bounds(-bound,bound,-bound,bound)
|
||||
|
||||
if s is None: return None
|
||||
|
||||
p1 = [-self.LINK_LENGTH_1 *
|
||||
np.cos(s[0]), self.LINK_LENGTH_1 * np.sin(s[0])]
|
||||
|
||||
p2 = [p1[0] - self.LINK_LENGTH_2 * np.cos(s[0] + s[1]),
|
||||
p1[1] + self.LINK_LENGTH_2 * np.sin(s[0] + s[1])]
|
||||
|
||||
xys = np.array([[0,0], p1, p2])[:,::-1]
|
||||
thetas = [s[0]-np.pi/2, s[0]+s[1]-np.pi/2]
|
||||
link_lengths = [self.LINK_LENGTH_1, self.LINK_LENGTH_2]
|
||||
|
||||
self.viewer.draw_line((-2.2, 1), (2.2, 1))
|
||||
for ((x,y),th,llen) in zip(xys, thetas, link_lengths):
|
||||
l,r,t,b = 0, llen, .1, -.1
|
||||
jtransform = rendering.Transform(rotation=th, translation=(x,y))
|
||||
link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
|
||||
link.add_attr(jtransform)
|
||||
link.set_color(0,.8, .8)
|
||||
circ = self.viewer.draw_circle(.1)
|
||||
circ.set_color(.8, .8, 0)
|
||||
circ.add_attr(jtransform)
|
||||
|
||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||
|
||||
def close(self):
|
||||
if self.viewer:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
||||
|
||||
def wrap(x, m, M):
|
||||
"""
|
||||
:param x: a scalar
|
||||
:param m: minimum possible value in range
|
||||
:param M: maximum possible value in range
|
||||
Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
|
||||
truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
|
||||
For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
|
||||
"""
|
||||
diff = M - m
|
||||
while x > M:
|
||||
x = x - diff
|
||||
while x < m:
|
||||
x = x + diff
|
||||
return x
|
||||
|
||||
def bound(x, m, M=None):
|
||||
"""
|
||||
:param x: scalar
|
||||
Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
|
||||
have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
|
||||
"""
|
||||
if M is None:
|
||||
M = m[1]
|
||||
m = m[0]
|
||||
# bound x between min (m) and Max (M)
|
||||
return min(max(x, m), M)
|
||||
|
||||
|
||||
def rk4(derivs, y0, t, *args, **kwargs):
|
||||
"""
|
||||
Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta.
|
||||
This is a toy implementation which may be useful if you find
|
||||
yourself stranded on a system w/o scipy. Otherwise use
|
||||
:func:`scipy.integrate`.
|
||||
*y0*
|
||||
initial state vector
|
||||
*t*
|
||||
sample times
|
||||
*derivs*
|
||||
returns the derivative of the system and has the
|
||||
signature ``dy = derivs(yi, ti)``
|
||||
*args*
|
||||
additional arguments passed to the derivative function
|
||||
*kwargs*
|
||||
additional keyword arguments passed to the derivative function
|
||||
Example 1 ::
|
||||
## 2D system
|
||||
def derivs6(x,t):
|
||||
d1 = x[0] + 2*x[1]
|
||||
d2 = -3*x[0] + 4*x[1]
|
||||
return (d1, d2)
|
||||
dt = 0.0005
|
||||
t = arange(0.0, 2.0, dt)
|
||||
y0 = (1,2)
|
||||
yout = rk4(derivs6, y0, t)
|
||||
Example 2::
|
||||
## 1D system
|
||||
alpha = 2
|
||||
def derivs(x,t):
|
||||
return -alpha*x + exp(-t)
|
||||
y0 = 1
|
||||
yout = rk4(derivs, y0, t)
|
||||
If you have access to scipy, you should probably be using the
|
||||
scipy.integrate tools rather than this function.
|
||||
"""
|
||||
|
||||
try:
|
||||
Ny = len(y0)
|
||||
except TypeError:
|
||||
yout = np.zeros((len(t),), np.float_)
|
||||
else:
|
||||
yout = np.zeros((len(t), Ny), np.float_)
|
||||
|
||||
yout[0] = y0
|
||||
|
||||
|
||||
for i in np.arange(len(t) - 1):
|
||||
|
||||
thist = t[i]
|
||||
dt = t[i + 1] - thist
|
||||
dt2 = dt / 2.0
|
||||
y0 = yout[i]
|
||||
|
||||
k1 = np.asarray(derivs(y0, thist, *args, **kwargs))
|
||||
k2 = np.asarray(derivs(y0 + dt2 * k1, thist + dt2, *args, **kwargs))
|
||||
k3 = np.asarray(derivs(y0 + dt2 * k2, thist + dt2, *args, **kwargs))
|
||||
k4 = np.asarray(derivs(y0 + dt * k3, thist + dt, *args, **kwargs))
|
||||
yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4)
|
||||
return yout
|
Binary file not shown.
After Width: | Height: | Size: 6.8 KiB |
@ -0,0 +1,193 @@
|
||||
"""
|
||||
Classic cart-pole system implemented by Rich Sutton et al.
|
||||
Copied from http://incompleteideas.net/sutton/book/code/pole.c
|
||||
permalink: https://perma.cc/C9ZM-652R
|
||||
"""
|
||||
|
||||
import math
|
||||
import gym
|
||||
from gym import spaces, logger
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
|
||||
class CartPoleEnv(gym.Env):
|
||||
"""
|
||||
Description:
|
||||
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
|
||||
|
||||
Source:
|
||||
This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
|
||||
|
||||
Observation:
|
||||
Type: Box(4)
|
||||
Num Observation Min Max
|
||||
0 Cart Position -4.8 4.8
|
||||
1 Cart Velocity -Inf Inf
|
||||
2 Pole Angle -24° 24°
|
||||
3 Pole Velocity At Tip -Inf Inf
|
||||
|
||||
Actions:
|
||||
Type: Discrete(2)
|
||||
Num Action
|
||||
0 Push cart to the left
|
||||
1 Push cart to the right
|
||||
|
||||
Note: The amount the velocity is reduced or increased is not fixed as it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it
|
||||
|
||||
Reward:
|
||||
Reward is 1 for every step taken, including the termination step
|
||||
|
||||
Starting State:
|
||||
All observations are assigned a uniform random value between ±0.05
|
||||
|
||||
Episode Termination:
|
||||
Pole Angle is more than ±12°
|
||||
Cart Position is more than ±2.4 (center of the cart reaches the edge of the display)
|
||||
Episode length is greater than 200
|
||||
Solved Requirements
|
||||
Considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.
|
||||
"""
|
||||
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 50
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.gravity = 9.8
|
||||
self.masscart = 1.0
|
||||
self.masspole = 0.1
|
||||
self.total_mass = (self.masspole + self.masscart)
|
||||
self.length = 0.5 # actually half the pole's length
|
||||
self.polemass_length = (self.masspole * self.length)
|
||||
self.force_mag = 10.0
|
||||
self.tau = 0.02 # seconds between state updates
|
||||
self.kinematics_integrator = 'euler'
|
||||
|
||||
# Angle at which to fail the episode
|
||||
self.theta_threshold_radians = 12 * 2 * math.pi / 360
|
||||
self.x_threshold = 2.4
|
||||
|
||||
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
|
||||
high = np.array([
|
||||
self.x_threshold * 2,
|
||||
np.finfo(np.float32).max,
|
||||
self.theta_threshold_radians * 2,
|
||||
np.finfo(np.float32).max])
|
||||
|
||||
self.action_space = spaces.Discrete(2)
|
||||
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
|
||||
|
||||
self.seed()
|
||||
self.viewer = None
|
||||
self.state = None
|
||||
|
||||
self.steps_beyond_done = None
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def step(self, action):
|
||||
assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
|
||||
state = self.state
|
||||
x, x_dot, theta, theta_dot = state
|
||||
force = self.force_mag if action==1 else -self.force_mag
|
||||
costheta = math.cos(theta)
|
||||
sintheta = math.sin(theta)
|
||||
temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
|
||||
thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
|
||||
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
|
||||
if self.kinematics_integrator == 'euler':
|
||||
x = x + self.tau * x_dot
|
||||
x_dot = x_dot + self.tau * xacc
|
||||
theta = theta + self.tau * theta_dot
|
||||
theta_dot = theta_dot + self.tau * thetaacc
|
||||
else: # semi-implicit euler
|
||||
x_dot = x_dot + self.tau * xacc
|
||||
x = x + self.tau * x_dot
|
||||
theta_dot = theta_dot + self.tau * thetaacc
|
||||
theta = theta + self.tau * theta_dot
|
||||
self.state = (x,x_dot,theta,theta_dot)
|
||||
done = x < -self.x_threshold \
|
||||
or x > self.x_threshold \
|
||||
or theta < -self.theta_threshold_radians \
|
||||
or theta > self.theta_threshold_radians
|
||||
done = bool(done)
|
||||
|
||||
if not done:
|
||||
reward = 1.0
|
||||
elif self.steps_beyond_done is None:
|
||||
# Pole just fell!
|
||||
self.steps_beyond_done = 0
|
||||
reward = 1.0
|
||||
else:
|
||||
if self.steps_beyond_done == 0:
|
||||
logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
|
||||
self.steps_beyond_done += 1
|
||||
reward = 0.0
|
||||
|
||||
return np.array(self.state), reward, done, {}
|
||||
|
||||
def reset(self):
|
||||
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
|
||||
self.steps_beyond_done = None
|
||||
return np.array(self.state)
|
||||
|
||||
def render(self, mode='human'):
|
||||
screen_width = 600
|
||||
screen_height = 400
|
||||
|
||||
world_width = self.x_threshold*2
|
||||
scale = screen_width/world_width
|
||||
carty = 100 # TOP OF CART
|
||||
polewidth = 10.0
|
||||
polelen = scale * (2 * self.length)
|
||||
cartwidth = 50.0
|
||||
cartheight = 30.0
|
||||
|
||||
if self.viewer is None:
|
||||
from gym.envs.classic_control import rendering
|
||||
self.viewer = rendering.Viewer(screen_width, screen_height)
|
||||
l,r,t,b = -cartwidth/2, cartwidth/2, cartheight/2, -cartheight/2
|
||||
axleoffset =cartheight/4.0
|
||||
cart = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
|
||||
self.carttrans = rendering.Transform()
|
||||
cart.add_attr(self.carttrans)
|
||||
self.viewer.add_geom(cart)
|
||||
l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
|
||||
pole = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
|
||||
pole.set_color(.8,.6,.4)
|
||||
self.poletrans = rendering.Transform(translation=(0, axleoffset))
|
||||
pole.add_attr(self.poletrans)
|
||||
pole.add_attr(self.carttrans)
|
||||
self.viewer.add_geom(pole)
|
||||
self.axle = rendering.make_circle(polewidth/2)
|
||||
self.axle.add_attr(self.poletrans)
|
||||
self.axle.add_attr(self.carttrans)
|
||||
self.axle.set_color(.5,.5,.8)
|
||||
self.viewer.add_geom(self.axle)
|
||||
self.track = rendering.Line((0,carty), (screen_width,carty))
|
||||
self.track.set_color(0,0,0)
|
||||
self.viewer.add_geom(self.track)
|
||||
|
||||
self._pole_geom = pole
|
||||
|
||||
if self.state is None: return None
|
||||
|
||||
# Edit the pole polygon vertex
|
||||
pole = self._pole_geom
|
||||
l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
|
||||
pole.v = [(l,b), (l,t), (r,t), (r,b)]
|
||||
|
||||
x = self.state
|
||||
cartx = x[0]*scale+screen_width/2.0 # MIDDLE OF CART
|
||||
self.carttrans.set_translation(cartx, carty)
|
||||
self.poletrans.set_rotation(-x[2])
|
||||
|
||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||
|
||||
def close(self):
|
||||
if self.viewer:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
@ -0,0 +1,148 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@author: Olivier Sigaud
|
||||
|
||||
A merge between two sources:
|
||||
|
||||
* Adaptation of the MountainCar Environment from the "FAReinforcement" library
|
||||
of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch'
|
||||
and then modified by Arnaud de Broissia
|
||||
|
||||
* the OpenAI/gym MountainCar environment
|
||||
itself from
|
||||
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
|
||||
permalink: https://perma.cc/6Z2N-PFWC
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
|
||||
class Continuous_MountainCarEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second': 30
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.min_action = -1.0
|
||||
self.max_action = 1.0
|
||||
self.min_position = -1.2
|
||||
self.max_position = 0.6
|
||||
self.max_speed = 0.07
|
||||
self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
|
||||
self.power = 0.0015
|
||||
|
||||
self.low_state = np.array([self.min_position, -self.max_speed])
|
||||
self.high_state = np.array([self.max_position, self.max_speed])
|
||||
|
||||
self.viewer = None
|
||||
|
||||
self.action_space = spaces.Box(low=self.min_action, high=self.max_action,
|
||||
shape=(1,), dtype=np.float32)
|
||||
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
|
||||
dtype=np.float32)
|
||||
|
||||
self.seed()
|
||||
self.reset()
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def step(self, action):
|
||||
|
||||
position = self.state[0]
|
||||
velocity = self.state[1]
|
||||
force = min(max(action[0], -1.0), 1.0)
|
||||
|
||||
velocity += force*self.power -0.0025 * math.cos(3*position)
|
||||
if (velocity > self.max_speed): velocity = self.max_speed
|
||||
if (velocity < -self.max_speed): velocity = -self.max_speed
|
||||
position += velocity
|
||||
if (position > self.max_position): position = self.max_position
|
||||
if (position < self.min_position): position = self.min_position
|
||||
if (position==self.min_position and velocity<0): velocity = 0
|
||||
|
||||
done = bool(position >= self.goal_position)
|
||||
|
||||
reward = 0
|
||||
if done:
|
||||
reward = 100.0
|
||||
reward-= math.pow(action[0],2)*0.1
|
||||
|
||||
self.state = np.array([position, velocity])
|
||||
return self.state, reward, done, {}
|
||||
|
||||
def reset(self):
|
||||
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
|
||||
return np.array(self.state)
|
||||
|
||||
# def get_state(self):
|
||||
# return self.state
|
||||
|
||||
def _height(self, xs):
|
||||
return np.sin(3 * xs)*.45+.55
|
||||
|
||||
def render(self, mode='human'):
|
||||
screen_width = 600
|
||||
screen_height = 400
|
||||
|
||||
world_width = self.max_position - self.min_position
|
||||
scale = screen_width/world_width
|
||||
carwidth=40
|
||||
carheight=20
|
||||
|
||||
|
||||
if self.viewer is None:
|
||||
from gym.envs.classic_control import rendering
|
||||
self.viewer = rendering.Viewer(screen_width, screen_height)
|
||||
xs = np.linspace(self.min_position, self.max_position, 100)
|
||||
ys = self._height(xs)
|
||||
xys = list(zip((xs-self.min_position)*scale, ys*scale))
|
||||
|
||||
self.track = rendering.make_polyline(xys)
|
||||
self.track.set_linewidth(4)
|
||||
self.viewer.add_geom(self.track)
|
||||
|
||||
clearance = 10
|
||||
|
||||
l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
|
||||
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
|
||||
car.add_attr(rendering.Transform(translation=(0, clearance)))
|
||||
self.cartrans = rendering.Transform()
|
||||
car.add_attr(self.cartrans)
|
||||
self.viewer.add_geom(car)
|
||||
frontwheel = rendering.make_circle(carheight/2.5)
|
||||
frontwheel.set_color(.5, .5, .5)
|
||||
frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
|
||||
frontwheel.add_attr(self.cartrans)
|
||||
self.viewer.add_geom(frontwheel)
|
||||
backwheel = rendering.make_circle(carheight/2.5)
|
||||
backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
|
||||
backwheel.add_attr(self.cartrans)
|
||||
backwheel.set_color(.5, .5, .5)
|
||||
self.viewer.add_geom(backwheel)
|
||||
flagx = (self.goal_position-self.min_position)*scale
|
||||
flagy1 = self._height(self.goal_position)*scale
|
||||
flagy2 = flagy1 + 50
|
||||
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
|
||||
self.viewer.add_geom(flagpole)
|
||||
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
|
||||
flag.set_color(.8,.8,0)
|
||||
self.viewer.add_geom(flag)
|
||||
|
||||
pos = self.state[0]
|
||||
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
|
||||
self.cartrans.set_rotation(math.cos(3 * pos))
|
||||
|
||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||
|
||||
def close(self):
|
||||
if self.viewer:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
@ -0,0 +1,121 @@
|
||||
"""
|
||||
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
|
||||
permalink: https://perma.cc/6Z2N-PFWC
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
|
||||
class MountainCarEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second': 30
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.min_position = -1.2
|
||||
self.max_position = 0.6
|
||||
self.max_speed = 0.07
|
||||
self.goal_position = 0.5
|
||||
|
||||
self.low = np.array([self.min_position, -self.max_speed])
|
||||
self.high = np.array([self.max_position, self.max_speed])
|
||||
|
||||
self.viewer = None
|
||||
|
||||
self.action_space = spaces.Discrete(3)
|
||||
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
|
||||
|
||||
self.seed()
|
||||
self.reset()
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def step(self, action):
|
||||
assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
|
||||
|
||||
position, velocity = self.state
|
||||
velocity += (action-1)*0.001 + math.cos(3*position)*(-0.0025)
|
||||
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
|
||||
position += velocity
|
||||
position = np.clip(position, self.min_position, self.max_position)
|
||||
if (position==self.min_position and velocity<0): velocity = 0
|
||||
|
||||
done = bool(position >= self.goal_position)
|
||||
reward = -1.0
|
||||
|
||||
self.state = (position, velocity)
|
||||
return np.array(self.state), reward, done, {}
|
||||
|
||||
def reset(self):
|
||||
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
|
||||
return np.array(self.state)
|
||||
|
||||
def _height(self, xs):
|
||||
return np.sin(3 * xs)*.45+.55
|
||||
|
||||
def render(self, mode='human'):
|
||||
screen_width = 600
|
||||
screen_height = 400
|
||||
|
||||
world_width = self.max_position - self.min_position
|
||||
scale = screen_width/world_width
|
||||
carwidth=40
|
||||
carheight=20
|
||||
|
||||
|
||||
if self.viewer is None:
|
||||
from gym.envs.classic_control import rendering
|
||||
self.viewer = rendering.Viewer(screen_width, screen_height)
|
||||
xs = np.linspace(self.min_position, self.max_position, 100)
|
||||
ys = self._height(xs)
|
||||
xys = list(zip((xs-self.min_position)*scale, ys*scale))
|
||||
|
||||
self.track = rendering.make_polyline(xys)
|
||||
self.track.set_linewidth(4)
|
||||
self.viewer.add_geom(self.track)
|
||||
|
||||
clearance = 10
|
||||
|
||||
l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
|
||||
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
|
||||
car.add_attr(rendering.Transform(translation=(0, clearance)))
|
||||
self.cartrans = rendering.Transform()
|
||||
car.add_attr(self.cartrans)
|
||||
self.viewer.add_geom(car)
|
||||
frontwheel = rendering.make_circle(carheight/2.5)
|
||||
frontwheel.set_color(.5, .5, .5)
|
||||
frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
|
||||
frontwheel.add_attr(self.cartrans)
|
||||
self.viewer.add_geom(frontwheel)
|
||||
backwheel = rendering.make_circle(carheight/2.5)
|
||||
backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
|
||||
backwheel.add_attr(self.cartrans)
|
||||
backwheel.set_color(.5, .5, .5)
|
||||
self.viewer.add_geom(backwheel)
|
||||
flagx = (self.goal_position-self.min_position)*scale
|
||||
flagy1 = self._height(self.goal_position)*scale
|
||||
flagy2 = flagy1 + 50
|
||||
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
|
||||
self.viewer.add_geom(flagpole)
|
||||
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
|
||||
flag.set_color(.8,.8,0)
|
||||
self.viewer.add_geom(flag)
|
||||
|
||||
pos = self.state[0]
|
||||
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
|
||||
self.cartrans.set_rotation(math.cos(3 * pos))
|
||||
|
||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||
|
||||
def close(self):
|
||||
if self.viewer:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
@ -0,0 +1,90 @@
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
from os import path
|
||||
|
||||
class PendulumEnv(gym.Env):
|
||||
metadata = {
|
||||
'render.modes' : ['human', 'rgb_array'],
|
||||
'video.frames_per_second' : 30
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.max_speed=8
|
||||
self.max_torque=2.
|
||||
self.dt=.05
|
||||
self.viewer = None
|
||||
|
||||
high = np.array([1., 1., self.max_speed])
|
||||
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32)
|
||||
self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
|
||||
|
||||
self.seed()
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def step(self,u):
|
||||
th, thdot = self.state # th := theta
|
||||
|
||||
g = 10.
|
||||
m = 1.
|
||||
l = 1.
|
||||
dt = self.dt
|
||||
|
||||
u = np.clip(u, -self.max_torque, self.max_torque)[0]
|
||||
self.last_u = u # for rendering
|
||||
costs = angle_normalize(th)**2 + .1*thdot**2 + .001*(u**2)
|
||||
|
||||
newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*u) * dt
|
||||
newth = th + newthdot*dt
|
||||
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) #pylint: disable=E1111
|
||||
|
||||
self.state = np.array([newth, newthdot])
|
||||
return self._get_obs(), -costs, False, {}
|
||||
|
||||
def reset(self):
|
||||
high = np.array([np.pi, 1])
|
||||
self.state = self.np_random.uniform(low=-high, high=high)
|
||||
self.last_u = None
|
||||
return self._get_obs()
|
||||
|
||||
def _get_obs(self):
|
||||
theta, thetadot = self.state
|
||||
return np.array([np.cos(theta), np.sin(theta), thetadot])
|
||||
|
||||
def render(self, mode='human'):
|
||||
|
||||
if self.viewer is None:
|
||||
from gym.envs.classic_control import rendering
|
||||
self.viewer = rendering.Viewer(500,500)
|
||||
self.viewer.set_bounds(-2.2,2.2,-2.2,2.2)
|
||||
rod = rendering.make_capsule(1, .2)
|
||||
rod.set_color(.8, .3, .3)
|
||||
self.pole_transform = rendering.Transform()
|
||||
rod.add_attr(self.pole_transform)
|
||||
self.viewer.add_geom(rod)
|
||||
axle = rendering.make_circle(.05)
|
||||
axle.set_color(0,0,0)
|
||||
self.viewer.add_geom(axle)
|
||||
fname = path.join(path.dirname(__file__), "assets/clockwise.png")
|
||||
self.img = rendering.Image(fname, 1., 1.)
|
||||
self.imgtrans = rendering.Transform()
|
||||
self.img.add_attr(self.imgtrans)
|
||||
|
||||
self.viewer.add_onetime(self.img)
|
||||
self.pole_transform.set_rotation(self.state[0] + np.pi/2)
|
||||
if self.last_u:
|
||||
self.imgtrans.scale = (-self.last_u/2, np.abs(self.last_u)/2)
|
||||
|
||||
return self.viewer.render(return_rgb_array = mode=='rgb_array')
|
||||
|
||||
def close(self):
|
||||
if self.viewer:
|
||||
self.viewer.close()
|
||||
self.viewer = None
|
||||
|
||||
def angle_normalize(x):
|
||||
return (((x+np.pi) % (2*np.pi)) - np.pi)
|
@ -0,0 +1,360 @@
|
||||
"""
|
||||
2D rendering framework
|
||||
"""
|
||||
from __future__ import division
|
||||
import os
|
||||
import six
|
||||
import sys
|
||||
|
||||
if "Apple" in sys.version:
|
||||
if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
|
||||
os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
|
||||
# (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
|
||||
|
||||
from gym.utils import reraise
|
||||
from gym import error
|
||||
|
||||
try:
|
||||
import pyglet
|
||||
except ImportError as e:
|
||||
reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
|
||||
|
||||
try:
|
||||
from pyglet.gl import *
|
||||
except ImportError as e:
|
||||
reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
RAD2DEG = 57.29577951308232
|
||||
|
||||
def get_display(spec):
|
||||
"""Convert a display specification (such as :0) into an actual Display
|
||||
object.
|
||||
|
||||
Pyglet only supports multiple Displays on Linux.
|
||||
"""
|
||||
if spec is None:
|
||||
return None
|
||||
elif isinstance(spec, six.string_types):
|
||||
return pyglet.canvas.Display(spec)
|
||||
else:
|
||||
raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
|
||||
|
||||
class Viewer(object):
|
||||
def __init__(self, width, height, display=None):
|
||||
display = get_display(display)
|
||||
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.window = pyglet.window.Window(width=width, height=height, display=display)
|
||||
self.window.on_close = self.window_closed_by_user
|
||||
self.isopen = True
|
||||
self.geoms = []
|
||||
self.onetime_geoms = []
|
||||
self.transform = Transform()
|
||||
|
||||
glEnable(GL_BLEND)
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
|
||||
|
||||
def close(self):
|
||||
self.window.close()
|
||||
|
||||
def window_closed_by_user(self):
|
||||
self.isopen = False
|
||||
|
||||
def set_bounds(self, left, right, bottom, top):
|
||||
assert right > left and top > bottom
|
||||
scalex = self.width/(right-left)
|
||||
scaley = self.height/(top-bottom)
|
||||
self.transform = Transform(
|
||||
translation=(-left*scalex, -bottom*scaley),
|
||||
scale=(scalex, scaley))
|
||||
|
||||
def add_geom(self, geom):
|
||||
self.geoms.append(geom)
|
||||
|
||||
def add_onetime(self, geom):
|
||||
self.onetime_geoms.append(geom)
|
||||
|
||||
def render(self, return_rgb_array=False):
|
||||
glClearColor(1,1,1,1)
|
||||
self.window.clear()
|
||||
self.window.switch_to()
|
||||
self.window.dispatch_events()
|
||||
self.transform.enable()
|
||||
for geom in self.geoms:
|
||||
geom.render()
|
||||
for geom in self.onetime_geoms:
|
||||
geom.render()
|
||||
self.transform.disable()
|
||||
arr = None
|
||||
if return_rgb_array:
|
||||
buffer = pyglet.image.get_buffer_manager().get_color_buffer()
|
||||
image_data = buffer.get_image_data()
|
||||
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
|
||||
# In https://github.com/openai/gym-http-api/issues/2, we
|
||||
# discovered that someone using Xmonad on Arch was having
|
||||
# a window of size 598 x 398, though a 600 x 400 window
|
||||
# was requested. (Guess Xmonad was preserving a pixel for
|
||||
# the boundary.) So we use the buffer height/width rather
|
||||
# than the requested one.
|
||||
arr = arr.reshape(buffer.height, buffer.width, 4)
|
||||
arr = arr[::-1,:,0:3]
|
||||
self.window.flip()
|
||||
self.onetime_geoms = []
|
||||
return arr if return_rgb_array else self.isopen
|
||||
|
||||
# Convenience
|
||||
def draw_circle(self, radius=10, res=30, filled=True, **attrs):
|
||||
geom = make_circle(radius=radius, res=res, filled=filled)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def draw_polygon(self, v, filled=True, **attrs):
|
||||
geom = make_polygon(v=v, filled=filled)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def draw_polyline(self, v, **attrs):
|
||||
geom = make_polyline(v=v)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def draw_line(self, start, end, **attrs):
|
||||
geom = Line(start, end)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def get_array(self):
|
||||
self.window.flip()
|
||||
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
|
||||
self.window.flip()
|
||||
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
|
||||
arr = arr.reshape(self.height, self.width, 4)
|
||||
return arr[::-1,:,0:3]
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def _add_attrs(geom, attrs):
|
||||
if "color" in attrs:
|
||||
geom.set_color(*attrs["color"])
|
||||
if "linewidth" in attrs:
|
||||
geom.set_linewidth(attrs["linewidth"])
|
||||
|
||||
class Geom(object):
|
||||
def __init__(self):
|
||||
self._color=Color((0, 0, 0, 1.0))
|
||||
self.attrs = [self._color]
|
||||
def render(self):
|
||||
for attr in reversed(self.attrs):
|
||||
attr.enable()
|
||||
self.render1()
|
||||
for attr in self.attrs:
|
||||
attr.disable()
|
||||
def render1(self):
|
||||
raise NotImplementedError
|
||||
def add_attr(self, attr):
|
||||
self.attrs.append(attr)
|
||||
def set_color(self, r, g, b):
|
||||
self._color.vec4 = (r, g, b, 1)
|
||||
|
||||
class Attr(object):
|
||||
def enable(self):
|
||||
raise NotImplementedError
|
||||
def disable(self):
|
||||
pass
|
||||
|
||||
class Transform(Attr):
|
||||
def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
|
||||
self.set_translation(*translation)
|
||||
self.set_rotation(rotation)
|
||||
self.set_scale(*scale)
|
||||
def enable(self):
|
||||
glPushMatrix()
|
||||
glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
|
||||
glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
|
||||
glScalef(self.scale[0], self.scale[1], 1)
|
||||
def disable(self):
|
||||
glPopMatrix()
|
||||
def set_translation(self, newx, newy):
|
||||
self.translation = (float(newx), float(newy))
|
||||
def set_rotation(self, new):
|
||||
self.rotation = float(new)
|
||||
def set_scale(self, newx, newy):
|
||||
self.scale = (float(newx), float(newy))
|
||||
|
||||
class Color(Attr):
|
||||
def __init__(self, vec4):
|
||||
self.vec4 = vec4
|
||||
def enable(self):
|
||||
glColor4f(*self.vec4)
|
||||
|
||||
class LineStyle(Attr):
|
||||
def __init__(self, style):
|
||||
self.style = style
|
||||
def enable(self):
|
||||
glEnable(GL_LINE_STIPPLE)
|
||||
glLineStipple(1, self.style)
|
||||
def disable(self):
|
||||
glDisable(GL_LINE_STIPPLE)
|
||||
|
||||
class LineWidth(Attr):
|
||||
def __init__(self, stroke):
|
||||
self.stroke = stroke
|
||||
def enable(self):
|
||||
glLineWidth(self.stroke)
|
||||
|
||||
class Point(Geom):
|
||||
def __init__(self):
|
||||
Geom.__init__(self)
|
||||
def render1(self):
|
||||
glBegin(GL_POINTS) # draw point
|
||||
glVertex3f(0.0, 0.0, 0.0)
|
||||
glEnd()
|
||||
|
||||
class FilledPolygon(Geom):
|
||||
def __init__(self, v):
|
||||
Geom.__init__(self)
|
||||
self.v = v
|
||||
def render1(self):
|
||||
if len(self.v) == 4 : glBegin(GL_QUADS)
|
||||
elif len(self.v) > 4 : glBegin(GL_POLYGON)
|
||||
else: glBegin(GL_TRIANGLES)
|
||||
for p in self.v:
|
||||
glVertex3f(p[0], p[1],0) # draw each vertex
|
||||
glEnd()
|
||||
|
||||
def make_circle(radius=10, res=30, filled=True):
|
||||
points = []
|
||||
for i in range(res):
|
||||
ang = 2*math.pi*i / res
|
||||
points.append((math.cos(ang)*radius, math.sin(ang)*radius))
|
||||
if filled:
|
||||
return FilledPolygon(points)
|
||||
else:
|
||||
return PolyLine(points, True)
|
||||
|
||||
def make_polygon(v, filled=True):
|
||||
if filled: return FilledPolygon(v)
|
||||
else: return PolyLine(v, True)
|
||||
|
||||
def make_polyline(v):
|
||||
return PolyLine(v, False)
|
||||
|
||||
def make_capsule(length, width):
|
||||
l, r, t, b = 0, length, width/2, -width/2
|
||||
box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
|
||||
circ0 = make_circle(width/2)
|
||||
circ1 = make_circle(width/2)
|
||||
circ1.add_attr(Transform(translation=(length, 0)))
|
||||
geom = Compound([box, circ0, circ1])
|
||||
return geom
|
||||
|
||||
class Compound(Geom):
|
||||
def __init__(self, gs):
|
||||
Geom.__init__(self)
|
||||
self.gs = gs
|
||||
for g in self.gs:
|
||||
g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
|
||||
def render1(self):
|
||||
for g in self.gs:
|
||||
g.render()
|
||||
|
||||
class PolyLine(Geom):
|
||||
def __init__(self, v, close):
|
||||
Geom.__init__(self)
|
||||
self.v = v
|
||||
self.close = close
|
||||
self.linewidth = LineWidth(1)
|
||||
self.add_attr(self.linewidth)
|
||||
def render1(self):
|
||||
glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
|
||||
for p in self.v:
|
||||
glVertex3f(p[0], p[1],0) # draw each vertex
|
||||
glEnd()
|
||||
def set_linewidth(self, x):
|
||||
self.linewidth.stroke = x
|
||||
|
||||
class Line(Geom):
|
||||
def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
|
||||
Geom.__init__(self)
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.linewidth = LineWidth(1)
|
||||
self.add_attr(self.linewidth)
|
||||
|
||||
def render1(self):
|
||||
glBegin(GL_LINES)
|
||||
glVertex2f(*self.start)
|
||||
glVertex2f(*self.end)
|
||||
glEnd()
|
||||
|
||||
class Image(Geom):
|
||||
def __init__(self, fname, width, height):
|
||||
Geom.__init__(self)
|
||||
self.width = width
|
||||
self.height = height
|
||||
img = pyglet.image.load(fname)
|
||||
self.img = img
|
||||
self.flip = False
|
||||
def render1(self):
|
||||
self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
|
||||
|
||||
# ================================================================
|
||||
|
||||
class SimpleImageViewer(object):
|
||||
def __init__(self, display=None, maxwidth=500):
|
||||
self.window = None
|
||||
self.isopen = False
|
||||
self.display = display
|
||||
self.maxwidth = maxwidth
|
||||
def imshow(self, arr):
|
||||
if self.window is None:
|
||||
height, width, _channels = arr.shape
|
||||
if width > self.maxwidth:
|
||||
scale = self.maxwidth / width
|
||||
width = int(scale * width)
|
||||
height = int(scale * height)
|
||||
self.window = pyglet.window.Window(width=width, height=height,
|
||||
display=self.display, vsync=False, resizable=True)
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.isopen = True
|
||||
|
||||
@self.window.event
|
||||
def on_resize(width, height):
|
||||
self.width = width
|
||||
self.height = height
|
||||
|
||||
@self.window.event
|
||||
def on_close():
|
||||
self.isopen = False
|
||||
|
||||
assert len(arr.shape) == 3, "You passed in an image with the wrong number shape"
|
||||
image = pyglet.image.ImageData(arr.shape[1], arr.shape[0],
|
||||
'RGB', arr.tobytes(), pitch=arr.shape[1]*-3)
|
||||
gl.glTexParameteri(gl.GL_TEXTURE_2D,
|
||||
gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
|
||||
texture = image.get_texture()
|
||||
texture.width = self.width
|
||||
texture.height = self.height
|
||||
self.window.clear()
|
||||
self.window.switch_to()
|
||||
self.window.dispatch_events()
|
||||
texture.blit(0, 0) # draw
|
||||
self.window.flip()
|
||||
def close(self):
|
||||
if self.isopen and sys.meta_path:
|
||||
# ^^^ check sys.meta_path to avoid 'ImportError: sys.meta_path is None, Python is likely shutting down'
|
||||
self.window.close()
|
||||
self.isopen = False
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
@ -0,0 +1,16 @@
|
||||
from gym.envs.mujoco.mujoco_env import MujocoEnv
|
||||
# ^^^^^ so that user gets the correct error
|
||||
# message if mujoco is not installed correctly
|
||||
from gym.envs.mujoco.ant import AntEnv
|
||||
from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
|
||||
from gym.envs.mujoco.hopper import HopperEnv
|
||||
from gym.envs.mujoco.walker2d import Walker2dEnv
|
||||
from gym.envs.mujoco.humanoid import HumanoidEnv
|
||||
from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
|
||||
from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
|
||||
from gym.envs.mujoco.reacher import ReacherEnv
|
||||
from gym.envs.mujoco.swimmer import SwimmerEnv
|
||||
from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
|
||||
from gym.envs.mujoco.pusher import PusherEnv
|
||||
from gym.envs.mujoco.thrower import ThrowerEnv
|
||||
from gym.envs.mujoco.striker import StrikerEnv
|
@ -0,0 +1,45 @@
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def step(self, a):
|
||||
xposbefore = self.get_body_com("torso")[0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
xposafter = self.get_body_com("torso")[0]
|
||||
forward_reward = (xposafter - xposbefore)/self.dt
|
||||
ctrl_cost = .5 * np.square(a).sum()
|
||||
contact_cost = 0.5 * 1e-3 * np.sum(
|
||||
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
|
||||
survive_reward = 1.0
|
||||
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
|
||||
state = self.state_vector()
|
||||
notdone = np.isfinite(state).all() \
|
||||
and state[2] >= 0.2 and state[2] <= 1.0
|
||||
done = not notdone
|
||||
ob = self._get_obs()
|
||||
return ob, reward, done, dict(
|
||||
reward_forward=forward_reward,
|
||||
reward_ctrl=-ctrl_cost,
|
||||
reward_contact=-contact_cost,
|
||||
reward_survive=survive_reward)
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate([
|
||||
self.sim.data.qpos.flat[2:],
|
||||
self.sim.data.qvel.flat,
|
||||
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
|
||||
])
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
|
||||
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.distance = self.model.stat.extent * 0.5
|
@ -0,0 +1,81 @@
|
||||
<mujoco model="ant">
|
||||
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
|
||||
<option integrator="RK4" timestep="0.01"/>
|
||||
<custom>
|
||||
<numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
|
||||
</custom>
|
||||
<default>
|
||||
<joint armature="1" damping="1" limited="true"/>
|
||||
<geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
|
||||
</default>
|
||||
<asset>
|
||||
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
|
||||
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
|
||||
<body name="torso" pos="0 0 0.75">
|
||||
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
|
||||
<geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
|
||||
<joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
|
||||
<body name="front_left_leg" pos="0 0 0">
|
||||
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
|
||||
<body name="aux_1" pos="0.2 0.2 0">
|
||||
<joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
|
||||
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
|
||||
<body pos="0.2 0.2 0">
|
||||
<joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
|
||||
<geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="front_right_leg" pos="0 0 0">
|
||||
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
|
||||
<body name="aux_2" pos="-0.2 0.2 0">
|
||||
<joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
|
||||
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
|
||||
<body pos="-0.2 0.2 0">
|
||||
<joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
|
||||
<geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="back_leg" pos="0 0 0">
|
||||
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
|
||||
<body name="aux_3" pos="-0.2 -0.2 0">
|
||||
<joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
|
||||
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
|
||||
<body pos="-0.2 -0.2 0">
|
||||
<joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
|
||||
<geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="right_back_leg" pos="0 0 0">
|
||||
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
|
||||
<body name="aux_4" pos="0.2 -0.2 0">
|
||||
<joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
|
||||
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
|
||||
<body pos="0.2 -0.2 0">
|
||||
<joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
|
||||
<geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,96 @@
|
||||
<!-- Cheetah Model
|
||||
|
||||
The state space is populated with joints in the order that they are
|
||||
defined in this file. The actuators also operate on joints.
|
||||
|
||||
State-Space (name/joint/parameter):
|
||||
- rootx slider position (m)
|
||||
- rootz slider position (m)
|
||||
- rooty hinge angle (rad)
|
||||
- bthigh hinge angle (rad)
|
||||
- bshin hinge angle (rad)
|
||||
- bfoot hinge angle (rad)
|
||||
- fthigh hinge angle (rad)
|
||||
- fshin hinge angle (rad)
|
||||
- ffoot hinge angle (rad)
|
||||
- rootx slider velocity (m/s)
|
||||
- rootz slider velocity (m/s)
|
||||
- rooty hinge angular velocity (rad/s)
|
||||
- bthigh hinge angular velocity (rad/s)
|
||||
- bshin hinge angular velocity (rad/s)
|
||||
- bfoot hinge angular velocity (rad/s)
|
||||
- fthigh hinge angular velocity (rad/s)
|
||||
- fshin hinge angular velocity (rad/s)
|
||||
- ffoot hinge angular velocity (rad/s)
|
||||
|
||||
Actuators (name/actuator/parameter):
|
||||
- bthigh hinge torque (N m)
|
||||
- bshin hinge torque (N m)
|
||||
- bfoot hinge torque (N m)
|
||||
- fthigh hinge torque (N m)
|
||||
- fshin hinge torque (N m)
|
||||
- ffoot hinge torque (N m)
|
||||
|
||||
-->
|
||||
<mujoco model="cheetah">
|
||||
<compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
|
||||
<default>
|
||||
<joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
|
||||
<geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1 1"/>
|
||||
</default>
|
||||
<size nstack="300000" nuser_geom="1"/>
|
||||
<option gravity="0 0 -9.81" timestep="0.01"/>
|
||||
<asset>
|
||||
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
|
||||
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
|
||||
<body name="torso" pos="0 0 .7">
|
||||
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
|
||||
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
|
||||
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
|
||||
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
|
||||
<geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
|
||||
<geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
|
||||
<!-- <site name='tip' pos='.15 0 .11'/>-->
|
||||
<body name="bthigh" pos="-.5 0 0">
|
||||
<joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
|
||||
<geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
|
||||
<body name="bshin" pos=".16 0 -.25">
|
||||
<joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
|
||||
<geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
|
||||
<body name="bfoot" pos="-.28 0 -.14">
|
||||
<joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
|
||||
<geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="fthigh" pos=".5 0 0">
|
||||
<joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
|
||||
<geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
|
||||
<body name="fshin" pos="-.14 0 -.24">
|
||||
<joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
|
||||
<geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
|
||||
<body name="ffoot" pos=".13 0 -.18">
|
||||
<joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
|
||||
<geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<motor gear="120" joint="bthigh" name="bthigh"/>
|
||||
<motor gear="90" joint="bshin" name="bshin"/>
|
||||
<motor gear="60" joint="bfoot" name="bfoot"/>
|
||||
<motor gear="120" joint="fthigh" name="fthigh"/>
|
||||
<motor gear="60" joint="fshin" name="fshin"/>
|
||||
<motor gear="30" joint="ffoot" name="ffoot"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,48 @@
|
||||
<mujoco model="hopper">
|
||||
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
|
||||
<default>
|
||||
<joint armature="1" damping="1" limited="true"/>
|
||||
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
|
||||
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
|
||||
</default>
|
||||
<option integrator="RK4" timestep="0.002"/>
|
||||
<visual>
|
||||
<map znear="0.02"/>
|
||||
</visual>
|
||||
<worldbody>
|
||||
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
|
||||
<body name="torso" pos="0 0 1.25">
|
||||
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
|
||||
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
|
||||
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
|
||||
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
|
||||
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
|
||||
<body name="thigh" pos="0 0 1.05">
|
||||
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
|
||||
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
|
||||
<body name="leg" pos="0 0 0.35">
|
||||
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
|
||||
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
|
||||
<body name="foot" pos="0.13/2 0 0.1">
|
||||
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
|
||||
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
|
||||
</actuator>
|
||||
<asset>
|
||||
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
|
||||
width="100" height="100"/>
|
||||
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||
</asset>
|
||||
</mujoco>
|
@ -0,0 +1,121 @@
|
||||
<mujoco model="humanoid">
|
||||
<compiler angle="degree" inertiafromgeom="true"/>
|
||||
<default>
|
||||
<joint armature="1" damping="1" limited="true"/>
|
||||
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
|
||||
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
|
||||
</default>
|
||||
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
|
||||
<!-- <flags solverstat="enable" energy="enable"/>-->
|
||||
</option>
|
||||
<size nkey="5" nuser_geom="1"/>
|
||||
<visual>
|
||||
<map fogend="5" fogstart="3"/>
|
||||
</visual>
|
||||
<asset>
|
||||
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
|
||||
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
|
||||
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
|
||||
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
|
||||
<body name="torso" pos="0 0 1.4">
|
||||
<camera name="track" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1"/>
|
||||
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
|
||||
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
|
||||
<geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258"/>
|
||||
<geom fromto="-.01 -.06 -.12 -.01 .06 -.12" name="uwaist" size="0.06" type="capsule"/>
|
||||
<body name="lwaist" pos="-.01 0 -0.260" quat="1.000 0 -0.002 0">
|
||||
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
|
||||
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
|
||||
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
|
||||
<body name="pelvis" pos="0 0 -0.165" quat="1.000 0 -0.002 0">
|
||||
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
|
||||
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
|
||||
<body name="right_thigh" pos="0 -0.1 -0.04">
|
||||
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
|
||||
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
|
||||
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
|
||||
<geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule"/>
|
||||
<body name="right_shin" pos="0 0.01 -0.403">
|
||||
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
|
||||
<geom fromto="0 0 0 0 0 -.3" name="right_shin1" size="0.049" type="capsule"/>
|
||||
<body name="right_foot" pos="0 0 -0.45">
|
||||
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="left_thigh" pos="0 0.1 -0.04">
|
||||
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
|
||||
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
|
||||
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
|
||||
<geom fromto="0 0 0 0 -0.01 -.34" name="left_thigh1" size="0.06" type="capsule"/>
|
||||
<body name="left_shin" pos="0 -0.01 -0.403">
|
||||
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
|
||||
<geom fromto="0 0 0 0 0 -.3" name="left_shin1" size="0.049" type="capsule"/>
|
||||
<body name="left_foot" pos="0 0 -0.45">
|
||||
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="right_upper_arm" pos="0 -0.17 0.06">
|
||||
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
|
||||
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
|
||||
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
|
||||
<body name="right_lower_arm" pos=".18 -.18 -.18">
|
||||
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
|
||||
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
|
||||
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
|
||||
<camera pos="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
<body name="left_upper_arm" pos="0 0.17 0.06">
|
||||
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
|
||||
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
|
||||
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
|
||||
<body name="left_lower_arm" pos=".18 .18 -.18">
|
||||
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
|
||||
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
|
||||
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<tendon>
|
||||
<fixed name="left_hipknee">
|
||||
<joint coef="-1" joint="left_hip_y"/>
|
||||
<joint coef="1" joint="left_knee"/>
|
||||
</fixed>
|
||||
<fixed name="right_hipknee">
|
||||
<joint coef="-1" joint="right_hip_y"/>
|
||||
<joint coef="1" joint="right_knee"/>
|
||||
</fixed>
|
||||
</tendon>
|
||||
|
||||
<actuator>
|
||||
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
|
||||
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
|
||||
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
|
||||
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
|
||||
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
|
||||
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
|
||||
<motor gear="200" joint="right_knee" name="right_knee"/>
|
||||
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
|
||||
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
|
||||
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
|
||||
<motor gear="200" joint="left_knee" name="left_knee"/>
|
||||
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
|
||||
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
|
||||
<motor gear="25" joint="right_elbow" name="right_elbow"/>
|
||||
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
|
||||
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
|
||||
<motor gear="25" joint="left_elbow" name="left_elbow"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,121 @@
|
||||
<mujoco model="humanoidstandup">
|
||||
<compiler angle="degree" inertiafromgeom="true"/>
|
||||
<default>
|
||||
<joint armature="1" damping="1" limited="true"/>
|
||||
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
|
||||
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
|
||||
</default>
|
||||
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
|
||||
<!-- <flags solverstat="enable" energy="enable"/>-->
|
||||
</option>
|
||||
<size nkey="5" nuser_geom="1"/>
|
||||
<visual>
|
||||
<map fogend="5" fogstart="3"/>
|
||||
</visual>
|
||||
<asset>
|
||||
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
|
||||
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
|
||||
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
|
||||
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
|
||||
<body name="torso" pos="0 0 .105">
|
||||
<camera name="track" mode="trackcom" pos="0 -3 .5" xyaxes="1 0 0 0 0 1"/>
|
||||
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
|
||||
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
|
||||
<geom name="head" pos="-.15 0 0" size=".09" type="sphere" user="258"/>
|
||||
<geom fromto=".11 -.06 0 .11 .06 0" name="uwaist" size="0.06" type="capsule"/>
|
||||
<body name="lwaist" pos=".21 0 0" quat="1.000 0 -0.002 0">
|
||||
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
|
||||
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
|
||||
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
|
||||
<body name="pelvis" pos="0.165 0 0" quat="1.000 0 -0.002 0">
|
||||
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
|
||||
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
|
||||
<body name="right_thigh" pos="0 -0.1 0">
|
||||
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
|
||||
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
|
||||
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
|
||||
<geom fromto="0 0 0 0.34 0.01 0" name="right_thigh1" size="0.06" type="capsule"/>
|
||||
<body name="right_shin" pos="0.403 0.01 0">
|
||||
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
|
||||
<geom fromto="0 0 0 0.3 0 0" name="right_shin1" size="0.049" type="capsule"/>
|
||||
<body name="right_foot" pos="0.35 0 -.10">
|
||||
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="left_thigh" pos="0 0.1 0">
|
||||
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
|
||||
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
|
||||
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
|
||||
<geom fromto="0 0 0 0.34 -0.01 0" name="left_thigh1" size="0.06" type="capsule"/>
|
||||
<body name="left_shin" pos="0.403 -0.01 0">
|
||||
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
|
||||
<geom fromto="0 0 0 0.3 0 0" name="left_shin1" size="0.049" type="capsule"/>
|
||||
<body name="left_foot" pos="0.35 0 -.1">
|
||||
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="right_upper_arm" pos="0 -0.17 0.06">
|
||||
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
|
||||
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
|
||||
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
|
||||
<body name="right_lower_arm" pos=".18 -.18 -.18">
|
||||
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
|
||||
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
|
||||
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
|
||||
<camera pos="0 0 0"/>
|
||||
</body>
|
||||
</body>
|
||||
<body name="left_upper_arm" pos="0 0.17 0.06">
|
||||
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
|
||||
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
|
||||
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
|
||||
<body name="left_lower_arm" pos=".18 .18 -.18">
|
||||
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
|
||||
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
|
||||
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<tendon>
|
||||
<fixed name="left_hipknee">
|
||||
<joint coef="-1" joint="left_hip_y"/>
|
||||
<joint coef="1" joint="left_knee"/>
|
||||
</fixed>
|
||||
<fixed name="right_hipknee">
|
||||
<joint coef="-1" joint="right_hip_y"/>
|
||||
<joint coef="1" joint="right_knee"/>
|
||||
</fixed>
|
||||
</tendon>
|
||||
|
||||
<actuator>
|
||||
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
|
||||
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
|
||||
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
|
||||
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
|
||||
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
|
||||
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
|
||||
<motor gear="200" joint="right_knee" name="right_knee"/>
|
||||
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
|
||||
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
|
||||
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
|
||||
<motor gear="200" joint="left_knee" name="left_knee"/>
|
||||
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
|
||||
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
|
||||
<motor gear="25" joint="right_elbow" name="right_elbow"/>
|
||||
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
|
||||
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
|
||||
<motor gear="25" joint="left_elbow" name="left_elbow"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,47 @@
|
||||
<!-- Cartpole Model
|
||||
|
||||
The state space is populated with joints in the order that they are
|
||||
defined in this file. The actuators also operate on joints.
|
||||
|
||||
State-Space (name/joint/parameter):
|
||||
- cart slider position (m)
|
||||
- pole hinge angle (rad)
|
||||
- cart slider velocity (m/s)
|
||||
- pole hinge angular velocity (rad/s)
|
||||
|
||||
Actuators (name/actuator/parameter):
|
||||
- cart motor force x (N)
|
||||
|
||||
-->
|
||||
<mujoco model="cartpole">
|
||||
<compiler coordinate="local" inertiafromgeom="true"/>
|
||||
<custom>
|
||||
<numeric data="2" name="frame_skip"/>
|
||||
</custom>
|
||||
<default>
|
||||
<joint damping="0.05"/>
|
||||
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
|
||||
</default>
|
||||
<option gravity="1e-5 0 -9.81" integrator="RK4" timestep="0.01"/>
|
||||
<size nstack="3000"/>
|
||||
<worldbody>
|
||||
<geom name="floor" pos="0 0 -3.0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
|
||||
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
|
||||
<body name="cart" pos="0 0 0">
|
||||
<joint axis="1 0 0" limited="true" margin="0.01" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
|
||||
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
|
||||
<body name="pole" pos="0 0 0">
|
||||
<joint axis="0 1 0" name="hinge" pos="0 0 0" type="hinge"/>
|
||||
<geom fromto="0 0 0 0 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
|
||||
<body name="pole2" pos="0 0 0.6">
|
||||
<joint axis="0 1 0" name="hinge2" pos="0 0 0" type="hinge"/>
|
||||
<geom fromto="0 0 0 0 0 0.6" name="cpole2" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
|
||||
<site name="tip" pos="0 0 .6" size="0.01 0.01"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1 1" gear="500" joint="slider" name="slide"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,27 @@
|
||||
<mujoco model="inverted pendulum">
|
||||
<compiler inertiafromgeom="true"/>
|
||||
<default>
|
||||
<joint armature="0" damping="1" limited="true"/>
|
||||
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
|
||||
<tendon/>
|
||||
<motor ctrlrange="-3 3"/>
|
||||
</default>
|
||||
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
|
||||
<size nstack="3000"/>
|
||||
<worldbody>
|
||||
<!--geom name="ground" type="plane" pos="0 0 0" /-->
|
||||
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
|
||||
<body name="cart" pos="0 0 0">
|
||||
<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
|
||||
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
|
||||
<body name="pole" pos="0 0 0">
|
||||
<joint axis="0 1 0" name="hinge" pos="0 0 0" range="-90 90" type="hinge"/>
|
||||
<geom fromto="0 0 0 0.001 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
|
||||
<!-- <body name="pole2" pos="0.001 0 0.6"><joint name="hinge2" type="hinge" pos="0 0 0" axis="0 1 0"/><geom name="cpole2" type="capsule" fromto="0 0 0 0 0 0.6" size="0.05 0.3" rgba="0.7 0 0.7 1"/><site name="tip2" pos="0 0 .6"/></body>-->
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<motor gear="100" joint="slider" name="slide"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,31 @@
|
||||
<mujoco>
|
||||
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
|
||||
<option integrator="RK4" timestep="0.02"/>
|
||||
<default>
|
||||
<joint armature="0" damping="0" limited="false"/>
|
||||
<geom conaffinity="0" condim="3" density="100" friction="1 0.5 0.5" margin="0" rgba="0.8 0.6 0.4 1"/>
|
||||
</default>
|
||||
<asset>
|
||||
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
|
||||
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
|
||||
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
|
||||
<body name="torso" pos="0 0 0">
|
||||
<geom name="pointbody" pos="0 0 0.5" size="0.5" type="sphere"/>
|
||||
<geom name="pointarrow" pos="0.6 0 0.5" size="0.5 0.1 0.1" type="box"/>
|
||||
<joint axis="1 0 0" name="ballx" pos="0 0 0" type="slide"/>
|
||||
<joint axis="0 1 0" name="bally" pos="0 0 0" type="slide"/>
|
||||
<joint axis="0 0 1" limited="false" name="rot" pos="0 0 0" type="hinge"/>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<!-- Those are just dummy actuators for providing ranges -->
|
||||
<motor ctrllimited="true" ctrlrange="-1 1" joint="ballx"/>
|
||||
<motor ctrllimited="true" ctrlrange="-0.25 0.25" joint="rot"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,91 @@
|
||||
<mujoco model="arm3d">
|
||||
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
|
||||
<option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
|
||||
|
||||
<default>
|
||||
<joint armature='0.04' damping="1" limited="true"/>
|
||||
<geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
|
||||
</default>
|
||||
|
||||
<worldbody>
|
||||
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
|
||||
<geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
|
||||
|
||||
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
|
||||
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
|
||||
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
|
||||
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
|
||||
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
|
||||
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
|
||||
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
|
||||
|
||||
<body name="r_shoulder_lift_link" pos="0.1 0 0">
|
||||
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
|
||||
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
|
||||
|
||||
<body name="r_upper_arm_roll_link" pos="0 0 0">
|
||||
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
|
||||
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
|
||||
|
||||
<body name="r_upper_arm_link" pos="0 0 0">
|
||||
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
|
||||
|
||||
<body name="r_elbow_flex_link" pos="0.4 0 0">
|
||||
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
|
||||
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
|
||||
|
||||
<body name="r_forearm_roll_link" pos="0 0 0">
|
||||
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
|
||||
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
|
||||
|
||||
<body name="r_forearm_link" pos="0 0 0">
|
||||
<geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
|
||||
|
||||
<body name="r_wrist_flex_link" pos="0.321 0 0">
|
||||
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
|
||||
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
|
||||
|
||||
<body name="r_wrist_roll_link" pos="0 0 0">
|
||||
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
|
||||
<body name="tips_arm" pos="0 0 0">
|
||||
<geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
|
||||
<geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
|
||||
</body>
|
||||
<geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
|
||||
<geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
|
||||
<geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
|
||||
<!--<body name="object" pos="0.55 -0.3 -0.275" >-->
|
||||
<body name="object" pos="0.45 -0.05 -0.275" >
|
||||
<geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
|
||||
<geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
|
||||
<joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
|
||||
<joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
|
||||
</body>
|
||||
|
||||
<body name="goal" pos="0.45 -0.05 -0.3230">
|
||||
<geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
|
||||
<joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
|
||||
<joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
<actuator>
|
||||
<motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
|
||||
<motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
|
||||
<motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
|
||||
<motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
|
||||
<motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
|
||||
<motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
|
||||
<motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,39 @@
|
||||
<mujoco model="reacher">
|
||||
<compiler angle="radian" inertiafromgeom="true"/>
|
||||
<default>
|
||||
<joint armature="1" damping="1" limited="true"/>
|
||||
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
|
||||
</default>
|
||||
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
|
||||
<worldbody>
|
||||
<!-- Arena -->
|
||||
<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
|
||||
<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
|
||||
<geom conaffinity="0" fromto=" .3 -.3 .01 .3 .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
|
||||
<geom conaffinity="0" fromto="-.3 .3 .01 .3 .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
|
||||
<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
|
||||
<!-- Arm -->
|
||||
<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
|
||||
<body name="body0" pos="0 0 .01">
|
||||
<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
|
||||
<joint axis="0 0 1" limited="false" name="joint0" pos="0 0 0" type="hinge"/>
|
||||
<body name="body1" pos="0.1 0 0">
|
||||
<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-3.0 3.0" type="hinge"/>
|
||||
<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
|
||||
<body name="fingertip" pos="0.11 0 0">
|
||||
<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<!-- Target -->
|
||||
<body name="target" pos=".1 -.1 .01">
|
||||
<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
|
||||
<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
|
||||
<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,101 @@
|
||||
<mujoco model="arm3d">
|
||||
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
|
||||
<option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler"/>
|
||||
|
||||
<default>
|
||||
<joint armature='0.04' damping="1" limited="true"/>
|
||||
<geom friction=".0 .0 .0" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
|
||||
</default>
|
||||
|
||||
<worldbody>
|
||||
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
|
||||
<geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
|
||||
|
||||
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
|
||||
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05"/>
|
||||
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05"/>
|
||||
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03"/>
|
||||
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03"/>
|
||||
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
|
||||
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
|
||||
|
||||
<body name="r_shoulder_lift_link" pos="0.1 0 0">
|
||||
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
|
||||
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
|
||||
|
||||
<body name="r_upper_arm_roll_link" pos="0 0 0">
|
||||
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
|
||||
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
|
||||
|
||||
<body name="r_upper_arm_link" pos="0 0 0">
|
||||
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
|
||||
|
||||
<body name="r_elbow_flex_link" pos="0.4 0 0">
|
||||
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
|
||||
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
|
||||
|
||||
<body name="r_forearm_roll_link" pos="0 0 0">
|
||||
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
|
||||
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
|
||||
|
||||
<body name="r_forearm_link" pos="0 0 0">
|
||||
<geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
|
||||
|
||||
<body name="r_wrist_flex_link" pos="0.321 0 0">
|
||||
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
|
||||
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
|
||||
|
||||
<body name="r_wrist_roll_link" pos="0 0 0">
|
||||
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
|
||||
|
||||
|
||||
<body name="tips_arm" pos="0 0 0">
|
||||
<geom conaffinity="1" contype="1" name="tip_arml" pos="0.017 0 0" size="0.003 0.12 0.05" type="box" />
|
||||
|
||||
</body>
|
||||
<geom conaffinity="1" contype="1" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" type="capsule" />
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
|
||||
<body name="object" pos="0 0 -0.270" >
|
||||
<geom type="sphere" rgba="1 1 1 1" pos="0 0 0" size="0.05 0.05 0.05" contype="1" conaffinity="0"/>
|
||||
<joint name="obj_slidey" armature="0.1" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
|
||||
<joint name="obj_slidex" armature="0.1" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
|
||||
</body>
|
||||
|
||||
<body name="goal" pos="0.0 0.0 -0.3230">
|
||||
<geom rgba="1. 1. 1. 0" pos="0 0 0" type="box" size="0.01 0.01 0.01" contype="0" conaffinity="0"/>
|
||||
<body pos="0 0 0">
|
||||
<geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
|
||||
</body>
|
||||
<body name="coaster" pos="0 0 0">
|
||||
<geom rgba="1. 1. 1. 1" type="cylinder" size="0.08 0.001 0.1" density='1000000' contype="0" conaffinity="0"/>
|
||||
</body>
|
||||
<body pos="0 0 0" axisangle="0 0 1 0.785">
|
||||
<geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
|
||||
</body>
|
||||
<body pos="0 0 0" axisangle="0 0 1 -0.785">
|
||||
<geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
|
||||
</body>
|
||||
<joint name="goal_free" type="free" pos="0 0 0" limited="false" damping="0"/>
|
||||
</body>
|
||||
</worldbody>
|
||||
|
||||
<actuator>
|
||||
<motor joint="r_shoulder_pan_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
|
||||
<motor joint="r_shoulder_lift_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
|
||||
<motor joint="r_upper_arm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
|
||||
<motor joint="r_elbow_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
|
||||
<motor joint="r_forearm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
|
||||
<motor joint="r_wrist_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
|
||||
<motor joint="r_wrist_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true"/>
|
||||
</actuator>
|
||||
|
||||
</mujoco>
|
@ -0,0 +1,38 @@
|
||||
<mujoco model="swimmer">
|
||||
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
|
||||
<option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1"/>
|
||||
<default>
|
||||
<geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1"/>
|
||||
<joint armature='0.1' />
|
||||
</default>
|
||||
<asset>
|
||||
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
|
||||
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
|
||||
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane"/>
|
||||
<!-- ================= SWIMMER ================= /-->
|
||||
<body name="torso" pos="0 0 0">
|
||||
<geom density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule"/>
|
||||
<joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide"/>
|
||||
<joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide"/>
|
||||
<joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge"/>
|
||||
<body name="mid" pos="0.5 0 0">
|
||||
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
|
||||
<joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge"/>
|
||||
<body name="back" pos="-1 0 0">
|
||||
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
|
||||
<joint axis="0 0 1" limited="true" name="rot3" pos="0 0 0" range="-100 100" type="hinge"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot3"/>
|
||||
</actuator>
|
||||
</mujoco>
|
@ -0,0 +1,127 @@
|
||||
<mujoco model="arm3d">
|
||||
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
|
||||
<option timestep="0.01" gravity="0 0 -9.81" iterations="20" integrator="Euler"/>
|
||||
<default>
|
||||
<joint armature='0.75' damping="1" limited="true"/>
|
||||
<geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
|
||||
</default>
|
||||
|
||||
<worldbody>
|
||||
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
|
||||
|
||||
<geom type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
|
||||
|
||||
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
|
||||
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" density="0.0001"/>
|
||||
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" density="0.0001"/>
|
||||
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" density="0.0001"/>
|
||||
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" density="0.0001"/>
|
||||
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" density="1"/>
|
||||
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-0.4854 1.214602" damping="1.0"/>
|
||||
|
||||
<body name="r_shoulder_lift_link" pos="0.1 0 0">
|
||||
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" density="0.0001"/>
|
||||
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 0.7963" damping="1.0"/>
|
||||
|
||||
<body name="r_upper_arm_roll_link" pos="0 0 0">
|
||||
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
|
||||
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="1.0"/>
|
||||
|
||||
<body name="r_upper_arm_link" pos="0 0 0">
|
||||
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" density="0.0001"/>
|
||||
|
||||
<body name="r_elbow_flex_link" pos="0.4 0 0">
|
||||
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" density="0.0001"/>
|
||||
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.7 0.7" damping="1.0"/>
|
||||
|
||||
<body name="r_forearm_roll_link" pos="0 0 0">
|
||||
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
|
||||
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping="1.0" range="-1.5 1.5"/>
|
||||
|
||||
<body name="r_forearm_link" pos="0 0 0" axisangle="1 0 0 0.392">
|
||||
<geom name="fa" type="capsule" fromto="0 0 0 0 0 0.291" size="0.05" density="0.0001"/>
|
||||
|
||||
<body name="r_wrist_flex_link" pos="0 0 0.321" axisangle="0 0 1 1.57">
|
||||
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" density="0.0001"/>
|
||||
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-1.0 1.0" damping="1.0" />
|
||||
|
||||
<body name="r_wrist_roll_link" pos="0 0 0" axisangle="0 1 0 -1.178">
|
||||
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="0 1 0" damping="1.0" range="0 2.25"/>
|
||||
<geom type="capsule" fromto="0 -0.05 0 0 0.05 0" size="0.01" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<body pos="0 0 0" axisangle="0 0 1 0.392">
|
||||
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density=".0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
</body>
|
||||
<body pos="0 0 0" axisangle="0 0 1 1.57">
|
||||
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
</body>
|
||||
<body pos="0 0 0" axisangle="0 0 1 1.178">
|
||||
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
</body>
|
||||
<body pos="0 0 0" axisangle="0 0 1 0.785">
|
||||
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
</body>
|
||||
<body pos="0 0 0" axisangle="0 0 1 1.96">
|
||||
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
</body>
|
||||
<body pos="0 0 0" axisangle="0 0 1 2.355">
|
||||
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
</body>
|
||||
<body pos="0 0 0" axisangle="0 0 1 2.74">
|
||||
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
|
||||
<body name="goal" pos="0.575 0.5 -0.328">
|
||||
<geom rgba="1 1 1 1" type="box" pos="0 0 0.005" size="0.075 0.075 0.001" contype="1" conaffinity="1" density="1000"/>
|
||||
<geom rgba="1 1 1 1" type="box" pos="0.0 0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
|
||||
<geom rgba="1 1 1 1" type="box" pos="0.0 -0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
|
||||
<geom rgba="1 1 1 1" type="box" pos="0.075 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
|
||||
<geom rgba="1 1 1 1" type="box" pos="-0.076 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
|
||||
<geom rgba="1 1 1 1" type="capsule" fromto="0.073 0.073 0.0075 0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
|
||||
<geom rgba="1 1 1 1" type="capsule" fromto="0.073 -0.073 0.0075 0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
|
||||
<geom rgba="1 1 1 1" type="capsule" fromto="-0.073 0.073 0.0075 -0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
|
||||
<geom rgba="1 1 1 1" type="capsule" fromto="-0.073 -0.073 0.0075 -0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
|
||||
<joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="1.0"/>
|
||||
<joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="1.0"/>
|
||||
</body>
|
||||
|
||||
<body name="ball" pos="0.5 -0.8 0.275">
|
||||
<geom rgba="1. 1. 1. 1" type="sphere" size="0.03 0.03 0.1" density='25' contype="1" conaffinity="1"/>
|
||||
<joint name="ball_free" type="free" armature='0' damping="0" limited="false"/>
|
||||
</body>
|
||||
|
||||
</worldbody>
|
||||
|
||||
<actuator>
|
||||
<motor joint="r_shoulder_pan_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
|
||||
<motor joint="r_shoulder_lift_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
|
||||
<motor joint="r_upper_arm_roll_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
|
||||
<motor joint="r_elbow_flex_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
|
||||
<motor joint="r_forearm_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
|
||||
<motor joint="r_wrist_flex_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
|
||||
<motor joint="r_wrist_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true"/>
|
||||
</actuator>
|
||||
|
||||
</mujoco>
|
@ -0,0 +1,62 @@
|
||||
<mujoco model="walker2d">
|
||||
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
|
||||
<default>
|
||||
<joint armature="0.01" damping=".1" limited="true"/>
|
||||
<geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
|
||||
</default>
|
||||
<option integrator="RK4" timestep="0.002"/>
|
||||
<worldbody>
|
||||
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
|
||||
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
|
||||
<body name="torso" pos="0 0 1.25">
|
||||
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
|
||||
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
|
||||
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
|
||||
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
|
||||
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
|
||||
<body name="thigh" pos="0 0 1.05">
|
||||
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
|
||||
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
|
||||
<body name="leg" pos="0 0 0.35">
|
||||
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
|
||||
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
|
||||
<body name="foot" pos="0.2/2 0 0.1">
|
||||
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
|
||||
<geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
|
||||
<body name="thigh_left" pos="0 0 1.05">
|
||||
<joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
|
||||
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.05" type="capsule"/>
|
||||
<body name="leg_left" pos="0 0 0.35">
|
||||
<joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
|
||||
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
|
||||
<body name="foot_left" pos="0.2/2 0 0.1">
|
||||
<joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
|
||||
<geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
|
||||
<!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
|
||||
</actuator>
|
||||
<asset>
|
||||
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
|
||||
width="100" height="100"/>
|
||||
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
|
||||
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
|
||||
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
|
||||
<material name="geom" texture="texgeom" texuniform="true"/>
|
||||
</asset>
|
||||
</mujoco>
|
@ -0,0 +1,34 @@
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def step(self, action):
|
||||
xposbefore = self.sim.data.qpos[0]
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
xposafter = self.sim.data.qpos[0]
|
||||
ob = self._get_obs()
|
||||
reward_ctrl = - 0.1 * np.square(action).sum()
|
||||
reward_run = (xposafter - xposbefore)/self.dt
|
||||
reward = reward_ctrl + reward_run
|
||||
done = False
|
||||
return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate([
|
||||
self.sim.data.qpos.flat[1:],
|
||||
self.sim.data.qvel.flat,
|
||||
])
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
|
||||
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.distance = self.model.stat.extent * 0.5
|
@ -0,0 +1,40 @@
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def step(self, a):
|
||||
posbefore = self.sim.data.qpos[0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
posafter, height, ang = self.sim.data.qpos[0:3]
|
||||
alive_bonus = 1.0
|
||||
reward = (posafter - posbefore) / self.dt
|
||||
reward += alive_bonus
|
||||
reward -= 1e-3 * np.square(a).sum()
|
||||
s = self.state_vector()
|
||||
done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
|
||||
(height > .7) and (abs(ang) < .2))
|
||||
ob = self._get_obs()
|
||||
return ob, reward, done, {}
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate([
|
||||
self.sim.data.qpos.flat[1:],
|
||||
np.clip(self.sim.data.qvel.flat, -10, 10)
|
||||
])
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
|
||||
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.trackbodyid = 2
|
||||
self.viewer.cam.distance = self.model.stat.extent * 0.75
|
||||
self.viewer.cam.lookat[2] = 1.15
|
||||
self.viewer.cam.elevation = -20
|
@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
from gym import utils
|
||||
|
||||
def mass_center(model, sim):
|
||||
mass = np.expand_dims(model.body_mass, 1)
|
||||
xpos = sim.data.xipos
|
||||
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
|
||||
|
||||
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5)
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def _get_obs(self):
|
||||
data = self.sim.data
|
||||
return np.concatenate([data.qpos.flat[2:],
|
||||
data.qvel.flat,
|
||||
data.cinert.flat,
|
||||
data.cvel.flat,
|
||||
data.qfrc_actuator.flat,
|
||||
data.cfrc_ext.flat])
|
||||
|
||||
def step(self, a):
|
||||
pos_before = mass_center(self.model, self.sim)
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
pos_after = mass_center(self.model, self.sim)
|
||||
alive_bonus = 5.0
|
||||
data = self.sim.data
|
||||
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
|
||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
|
||||
quad_impact_cost = min(quad_impact_cost, 10)
|
||||
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
|
||||
qpos = self.sim.data.qpos
|
||||
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
|
||||
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
|
||||
|
||||
def reset_model(self):
|
||||
c = 0.01
|
||||
self.set_state(
|
||||
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.trackbodyid = 1
|
||||
self.viewer.cam.distance = self.model.stat.extent * 1.0
|
||||
self.viewer.cam.lookat[2] = 2.0
|
||||
self.viewer.cam.elevation = -20
|
@ -0,0 +1,45 @@
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
from gym import utils
|
||||
import numpy as np
|
||||
|
||||
class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def _get_obs(self):
|
||||
data = self.sim.data
|
||||
return np.concatenate([data.qpos.flat[2:],
|
||||
data.qvel.flat,
|
||||
data.cinert.flat,
|
||||
data.cvel.flat,
|
||||
data.qfrc_actuator.flat,
|
||||
data.cfrc_ext.flat])
|
||||
|
||||
def step(self, a):
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
pos_after = self.sim.data.qpos[2]
|
||||
data = self.sim.data
|
||||
uph_cost = (pos_after - 0) / self.model.opt.timestep
|
||||
|
||||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
|
||||
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
|
||||
quad_impact_cost = min(quad_impact_cost, 10)
|
||||
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
|
||||
|
||||
done = bool(False)
|
||||
return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
|
||||
|
||||
def reset_model(self):
|
||||
c = 0.01
|
||||
self.set_state(
|
||||
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.trackbodyid = 1
|
||||
self.viewer.cam.distance = self.model.stat.extent * 1.0
|
||||
self.viewer.cam.lookat[2] = 0.8925
|
||||
self.viewer.cam.elevation = -20
|
@ -0,0 +1,43 @@
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
|
||||
def __init__(self):
|
||||
mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def step(self, action):
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
ob = self._get_obs()
|
||||
x, _, y = self.sim.data.site_xpos[0]
|
||||
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
|
||||
v1, v2 = self.sim.data.qvel[1:3]
|
||||
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
|
||||
alive_bonus = 10
|
||||
r = alive_bonus - dist_penalty - vel_penalty
|
||||
done = bool(y <= 1)
|
||||
return ob, r, done, {}
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate([
|
||||
self.sim.data.qpos[:1], # cart x pos
|
||||
np.sin(self.sim.data.qpos[1:]), # link angles
|
||||
np.cos(self.sim.data.qpos[1:]),
|
||||
np.clip(self.sim.data.qvel, -10, 10),
|
||||
np.clip(self.sim.data.qfrc_constraint, -10, 10)
|
||||
]).ravel()
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(
|
||||
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.randn(self.model.nv) * .1
|
||||
)
|
||||
return self._get_obs()
|
||||
|
||||
def viewer_setup(self):
|
||||
v = self.viewer
|
||||
v.cam.trackbodyid = 0
|
||||
v.cam.distance = self.model.stat.extent * 0.5
|
||||
v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]
|
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
utils.EzPickle.__init__(self)
|
||||
mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
|
||||
|
||||
def step(self, a):
|
||||
reward = 1.0
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
ob = self._get_obs()
|
||||
notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
|
||||
done = not notdone
|
||||
return ob, reward, done, {}
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
|
||||
qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
|
||||
|
||||
def viewer_setup(self):
|
||||
v = self.viewer
|
||||
v.cam.trackbodyid = 0
|
||||
v.cam.distance = self.model.stat.extent
|
@ -0,0 +1,145 @@
|
||||
import os
|
||||
|
||||
from gym import error, spaces
|
||||
from gym.utils import seeding
|
||||
import numpy as np
|
||||
from os import path
|
||||
import gym
|
||||
import six
|
||||
|
||||
try:
|
||||
import mujoco_py
|
||||
except ImportError as e:
|
||||
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
|
||||
|
||||
DEFAULT_SIZE = 500
|
||||
|
||||
class MujocoEnv(gym.Env):
|
||||
"""Superclass for all MuJoCo environments.
|
||||
"""
|
||||
|
||||
def __init__(self, model_path, frame_skip):
|
||||
if model_path.startswith("/"):
|
||||
fullpath = model_path
|
||||
else:
|
||||
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
|
||||
if not path.exists(fullpath):
|
||||
raise IOError("File %s does not exist" % fullpath)
|
||||
self.frame_skip = frame_skip
|
||||
self.model = mujoco_py.load_model_from_path(fullpath)
|
||||
self.sim = mujoco_py.MjSim(self.model)
|
||||
self.data = self.sim.data
|
||||
self.viewer = None
|
||||
self._viewers = {}
|
||||
|
||||
self.metadata = {
|
||||
'render.modes': ['human', 'rgb_array', 'depth_array'],
|
||||
'video.frames_per_second': int(np.round(1.0 / self.dt))
|
||||
}
|
||||
|
||||
self.init_qpos = self.sim.data.qpos.ravel().copy()
|
||||
self.init_qvel = self.sim.data.qvel.ravel().copy()
|
||||
observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
|
||||
assert not done
|
||||
self.obs_dim = observation.size
|
||||
|
||||
bounds = self.model.actuator_ctrlrange.copy()
|
||||
low = bounds[:, 0]
|
||||
high = bounds[:, 1]
|
||||
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
|
||||
|
||||
high = np.inf*np.ones(self.obs_dim)
|
||||
low = -high
|
||||
self.observation_space = spaces.Box(low, high, dtype=np.float32)
|
||||
|
||||
self.seed()
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
# methods to override:
|
||||
# ----------------------------
|
||||
|
||||
def reset_model(self):
|
||||
"""
|
||||
Reset the robot degrees of freedom (qpos and qvel).
|
||||
Implement this in each subclass.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def viewer_setup(self):
|
||||
"""
|
||||
This method is called when the viewer is initialized.
|
||||
Optionally implement this method, if you need to tinker with camera position
|
||||
and so forth.
|
||||
"""
|
||||
pass
|
||||
|
||||
# -----------------------------
|
||||
|
||||
def reset(self):
|
||||
self.sim.reset()
|
||||
ob = self.reset_model()
|
||||
return ob
|
||||
|
||||
def set_state(self, qpos, qvel):
|
||||
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
|
||||
old_state = self.sim.get_state()
|
||||
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
|
||||
old_state.act, old_state.udd_state)
|
||||
self.sim.set_state(new_state)
|
||||
self.sim.forward()
|
||||
|
||||
@property
|
||||
def dt(self):
|
||||
return self.model.opt.timestep * self.frame_skip
|
||||
|
||||
def do_simulation(self, ctrl, n_frames):
|
||||
self.sim.data.ctrl[:] = ctrl
|
||||
for _ in range(n_frames):
|
||||
self.sim.step()
|
||||
|
||||
def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
|
||||
if mode == 'rgb_array':
|
||||
self._get_viewer(mode).render(width, height)
|
||||
# window size used for old mujoco-py:
|
||||
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
|
||||
# original image is upside-down, so flip it
|
||||
return data[::-1, :, :]
|
||||
elif mode == 'depth_array':
|
||||
self._get_viewer(mode).render(width, height)
|
||||
# window size used for old mujoco-py:
|
||||
# Extract depth part of the read_pixels() tuple
|
||||
data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
|
||||
# original image is upside-down, so flip it
|
||||
return data[::-1, :]
|
||||
elif mode == 'human':
|
||||
self._get_viewer(mode).render()
|
||||
|
||||
def close(self):
|
||||
if self.viewer is not None:
|
||||
# self.viewer.finish()
|
||||
self.viewer = None
|
||||
self._viewers = {}
|
||||
|
||||
def _get_viewer(self, mode):
|
||||
self.viewer = self._viewers.get(mode)
|
||||
if self.viewer is None:
|
||||
if mode == 'human':
|
||||
self.viewer = mujoco_py.MjViewer(self.sim)
|
||||
elif mode == 'rgb_array' or mode == 'depth_array':
|
||||
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
|
||||
|
||||
self.viewer_setup()
|
||||
self._viewers[mode] = self.viewer
|
||||
return self.viewer
|
||||
|
||||
def get_body_com(self, body_name):
|
||||
return self.data.get_body_xpos(body_name)
|
||||
|
||||
def state_vector(self):
|
||||
return np.concatenate([
|
||||
self.sim.data.qpos.flat,
|
||||
self.sim.data.qvel.flat
|
||||
])
|
@ -0,0 +1,57 @@
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
import mujoco_py
|
||||
|
||||
class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
utils.EzPickle.__init__(self)
|
||||
mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
|
||||
|
||||
def step(self, a):
|
||||
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
|
||||
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
|
||||
|
||||
reward_near = - np.linalg.norm(vec_1)
|
||||
reward_dist = - np.linalg.norm(vec_2)
|
||||
reward_ctrl = - np.square(a).sum()
|
||||
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
|
||||
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
ob = self._get_obs()
|
||||
done = False
|
||||
return ob, reward, done, dict(reward_dist=reward_dist,
|
||||
reward_ctrl=reward_ctrl)
|
||||
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.trackbodyid = -1
|
||||
self.viewer.cam.distance = 4.0
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.init_qpos
|
||||
|
||||
self.goal_pos = np.asarray([0, 0])
|
||||
while True:
|
||||
self.cylinder_pos = np.concatenate([
|
||||
self.np_random.uniform(low=-0.3, high=0, size=1),
|
||||
self.np_random.uniform(low=-0.2, high=0.2, size=1)])
|
||||
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
|
||||
break
|
||||
|
||||
qpos[-4:-2] = self.cylinder_pos
|
||||
qpos[-2:] = self.goal_pos
|
||||
qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
|
||||
high=0.005, size=self.model.nv)
|
||||
qvel[-4:] = 0
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate([
|
||||
self.sim.data.qpos.flat[:7],
|
||||
self.sim.data.qvel.flat[:7],
|
||||
self.get_body_com("tips_arm"),
|
||||
self.get_body_com("object"),
|
||||
self.get_body_com("goal"),
|
||||
])
|
@ -0,0 +1,43 @@
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
utils.EzPickle.__init__(self)
|
||||
mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
|
||||
|
||||
def step(self, a):
|
||||
vec = self.get_body_com("fingertip")-self.get_body_com("target")
|
||||
reward_dist = - np.linalg.norm(vec)
|
||||
reward_ctrl = - np.square(a).sum()
|
||||
reward = reward_dist + reward_ctrl
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
ob = self._get_obs()
|
||||
done = False
|
||||
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
|
||||
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.trackbodyid = 0
|
||||
|
||||
def reset_model(self):
|
||||
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
|
||||
while True:
|
||||
self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
|
||||
if np.linalg.norm(self.goal) < 2:
|
||||
break
|
||||
qpos[-2:] = self.goal
|
||||
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
|
||||
qvel[-2:] = 0
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def _get_obs(self):
|
||||
theta = self.sim.data.qpos.flat[:2]
|
||||
return np.concatenate([
|
||||
np.cos(theta),
|
||||
np.sin(theta),
|
||||
self.sim.data.qpos.flat[2:],
|
||||
self.sim.data.qvel.flat[:2],
|
||||
self.get_body_com("fingertip") - self.get_body_com("target")
|
||||
])
|
@ -0,0 +1,75 @@
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
utils.EzPickle.__init__(self)
|
||||
self._striked = False
|
||||
self._min_strike_dist = np.inf
|
||||
self.strike_threshold = 0.1
|
||||
mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5)
|
||||
|
||||
def step(self, a):
|
||||
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
|
||||
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
|
||||
self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2))
|
||||
|
||||
if np.linalg.norm(vec_1) < self.strike_threshold:
|
||||
self._striked = True
|
||||
self._strike_pos = self.get_body_com("tips_arm")
|
||||
|
||||
if self._striked:
|
||||
vec_3 = self.get_body_com("object") - self._strike_pos
|
||||
reward_near = - np.linalg.norm(vec_3)
|
||||
else:
|
||||
reward_near = - np.linalg.norm(vec_1)
|
||||
|
||||
reward_dist = - np.linalg.norm(self._min_strike_dist)
|
||||
reward_ctrl = - np.square(a).sum()
|
||||
reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
|
||||
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
ob = self._get_obs()
|
||||
done = False
|
||||
return ob, reward, done, dict(reward_dist=reward_dist,
|
||||
reward_ctrl=reward_ctrl)
|
||||
|
||||
def viewer_setup(self):
|
||||
self.viewer.cam.trackbodyid = 0
|
||||
self.viewer.cam.distance = 4.0
|
||||
|
||||
def reset_model(self):
|
||||
self._min_strike_dist = np.inf
|
||||
self._striked = False
|
||||
self._strike_pos = None
|
||||
|
||||
qpos = self.init_qpos
|
||||
|
||||
self.ball = np.array([0.5, -0.175])
|
||||
while True:
|
||||
self.goal = np.concatenate([
|
||||
self.np_random.uniform(low=0.15, high=0.7, size=1),
|
||||
self.np_random.uniform(low=0.1, high=1.0, size=1)])
|
||||
if np.linalg.norm(self.ball - self.goal) > 0.17:
|
||||
break
|
||||
|
||||
qpos[-9:-7] = [self.ball[1], self.ball[0]]
|
||||
qpos[-7:-5] = self.goal
|
||||
diff = self.ball - self.goal
|
||||
angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
|
||||
qpos[-1] = angle / 3.14
|
||||
qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
|
||||
size=self.model.nv)
|
||||
qvel[7:] = 0
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def _get_obs(self):
|
||||
return np.concatenate([
|
||||
self.sim.data.qpos.flat[:7],
|
||||
self.sim.data.qvel.flat[:7],
|
||||
self.get_body_com("tips_arm"),
|
||||
self.get_body_com("object"),
|
||||
self.get_body_com("goal"),
|
||||
])
|
@ -0,0 +1,31 @@
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
def __init__(self):
|
||||
mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
def step(self, a):
|
||||
ctrl_cost_coeff = 0.0001
|
||||
xposbefore = self.sim.data.qpos[0]
|
||||
self.do_simulation(a, self.frame_skip)
|
||||
xposafter = self.sim.data.qpos[0]
|
||||
reward_fwd = (xposafter - xposbefore) / self.dt
|
||||
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
|
||||
reward = reward_fwd + reward_ctrl
|
||||
ob = self._get_obs()
|
||||
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
|
||||
|
||||
def _get_obs(self):
|
||||
qpos = self.sim.data.qpos
|
||||
qvel = self.sim.data.qvel
|
||||
return np.concatenate([qpos.flat[2:], qvel.flat])
|
||||
|
||||
def reset_model(self):
|
||||
self.set_state(
|
||||
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
|
||||
self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
|
||||
)
|
||||
return self._get_obs()
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user