This commit is contained in:
Paul Corbalan 2023-08-21 17:09:08 +02:00
commit 5870182dad
10385 changed files with 4713964 additions and 0 deletions

View File

@ -0,0 +1,107 @@
# AI for Self Driving Car
# Importing the libraries
import numpy as np
import random
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
# Creating the architecture of the Neural Network
class Network(nn.Module):
def __init__(self, input_size, nb_action):
super(Network, self).__init__()
self.input_size = input_size
self.nb_action = nb_action
self.fc1 = nn.Linear(input_size, 30)
self.fc2 = nn.Linear(30, nb_action)
def forward(self, state):
x = F.relu(self.fc1(state))
q_values = self.fc2(x)
return q_values
# Implementing Experience Replay
class ReplayMemory(object):
def __init__(self, capacity):
self.capacity = capacity
self.memory = []
def push(self, event):
self.memory.append(event)
if len(self.memory) > self.capacity:
del self.memory[0]
def sample(self, batch_size):
samples = zip(*random.sample(self.memory, batch_size))
return map(lambda x: Variable(torch.cat(x, 0)), samples)
# Implementing Deep Q Learning
class Dqn():
def __init__(self, input_size, nb_action, gamma):
self.gamma = gamma
self.reward_window = []
self.model = Network(input_size, nb_action)
self.memory = ReplayMemory(100000)
self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)
self.last_state = torch.Tensor(input_size).unsqueeze(0)
self.last_action = 0
self.last_reward = 0
def select_action(self, state):
probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100
action = probs.multinomial(num_samples=1)
return action.data[0,0]
def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
next_outputs = self.model(batch_next_state).detach().max(1)[0]
target = self.gamma*next_outputs + batch_reward
td_loss = F.smooth_l1_loss(outputs, target)
self.optimizer.zero_grad()
td_loss.backward()
self.optimizer.step()
def update(self, reward, new_signal):
new_state = torch.Tensor(new_signal).float().unsqueeze(0)
self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
action = self.select_action(new_state)
if len(self.memory.memory) > 100:
batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)
self.learn(batch_state, batch_next_state, batch_reward, batch_action)
self.last_action = action
self.last_state = new_state
self.last_reward = reward
self.reward_window.append(reward)
if len(self.reward_window) > 1000:
del self.reward_window[0]
return action
def score(self):
return sum(self.reward_window)/(len(self.reward_window)+1.)
def save(self):
torch.save({'state_dict': self.model.state_dict(),
'optimizer' : self.optimizer.state_dict(),
}, 'last_brain.pth')
def load(self):
if os.path.isfile('last_brain.pth'):
print("=> loading checkpoint... ")
checkpoint = torch.load('last_brain.pth')
self.model.load_state_dict(checkpoint['state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
print("done !")
else:
print("no checkpoint found...")

View File

@ -0,0 +1,111 @@
# AI for Self Driving Car
# Importing the libraries
import numpy as np
import random
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
# Creating the architecture of the Neural Network
class Network(nn.Module):
def __init__(self, input_size, nb_action):
super(Network, self).__init__()
self.input_size = input_size
self.nb_action = nb_action
self.fc1 = nn.Linear(input_size, 64)
self.fc2 = nn.Linear(64, 32)
self.fc3 = nn.Linear(32, 16)
self.fc4 = nn.Linear(16, nb_action)
def forward(self, state):
x = F.relu(self.fc1(state))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
q_values = self.fc4(x)
return q_values
# Implementing Experience Replay
class ReplayMemory(object):
def __init__(self, capacity):
self.capacity = capacity
self.memory = []
def push(self, event):
self.memory.append(event)
if len(self.memory) > self.capacity:
del self.memory[0]
def sample(self, batch_size):
samples = zip(*random.sample(self.memory, batch_size))
return map(lambda x: Variable(torch.cat(x, 0)), samples)
# Implementing Deep Q Learning
class Dqn():
def __init__(self, input_size, nb_action, gamma):
self.gamma = gamma
self.reward_window = []
self.model = Network(input_size, nb_action)
self.memory = ReplayMemory(100000)
self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)
self.last_state = torch.Tensor(input_size).unsqueeze(0)
self.last_action = 0
self.last_reward = 0
def select_action(self, state):
probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100
action = probs.multinomial(num_samples=1)
return action.data[0,0]
def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
next_outputs = self.model(batch_next_state).detach().max(1)[0]
target = self.gamma*next_outputs + batch_reward
td_loss = F.smooth_l1_loss(outputs, target)
self.optimizer.zero_grad()
td_loss.backward()
self.optimizer.step()
def update(self, reward, new_signal):
new_state = torch.Tensor(new_signal).float().unsqueeze(0)
self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
action = self.select_action(new_state)
if len(self.memory.memory) > 100:
batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)
self.learn(batch_state, batch_next_state, batch_reward, batch_action)
self.last_action = action
self.last_state = new_state
self.last_reward = reward
self.reward_window.append(reward)
if len(self.reward_window) > 1000:
del self.reward_window[0]
return action
def score(self):
return sum(self.reward_window)/(len(self.reward_window)+1.)
def save(self):
torch.save({'state_dict': self.model.state_dict(),
'optimizer' : self.optimizer.state_dict(),
}, 'last_brain.pth')
def load(self):
if os.path.isfile('last_brain.pth'):
print("=> loading checkpoint... ")
checkpoint = torch.load('last_brain.pth')
self.model.load_state_dict(checkpoint['state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
print("done !")
else:
print("no checkpoint found...")

View File

@ -0,0 +1,59 @@
#:kivy 1.0.9
# ref: https://kivy.org/docs/tutorials/pong.html
<Car>:
size: 20, 10
canvas:
PushMatrix
Rotate:
angle: self.angle
origin: self.center
Rectangle:
pos: self.pos
size: self.size
PopMatrix
<Ball1>:
size: 10,10
canvas:
Color:
rgba: 1,0,0,1
Ellipse:
pos: self.pos
size: self.size
<Ball2>:
size: 10,10
canvas:
Color:
rgba: 0,1,1,1
Ellipse:
pos: self.pos
size: self.size
<Ball3>:
size: 10,10
canvas:
Color:
rgba: 1,1,0,1
Ellipse:
pos: self.pos
size: self.size
<Game>:
car: game_car
ball1: game_ball1
ball2: game_ball2
ball3: game_ball3
Car:
id: game_car
center: self.parent.center
Ball1:
id: game_ball1
center: self.parent.center
Ball2:
id: game_ball2
center: self.parent.center
Ball3:
id: game_ball3
center: self.parent.center

View File

@ -0,0 +1,235 @@
# Self Driving Car
# Importing the libraries
import numpy as np
from random import random, randint
import matplotlib.pyplot as plt
import time
# Importing the Kivy packages
from kivy.app import App
from kivy.uix.widget import Widget
from kivy.uix.button import Button
from kivy.graphics import Color, Ellipse, Line
from kivy.config import Config
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
from kivy.vector import Vector
from kivy.clock import Clock
# Importing the Dqn object from our AI in ai.py
from ai import Dqn
# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0
length = 0
# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = Dqn(5,3,0.9)
action2rotation = [0,20,-20]
last_reward = 0
scores = []
# Initializing the map
first_update = True
def init():
global sand
global goal_x
global goal_y
global first_update
sand = np.zeros((longueur,largeur))
goal_x = 20
goal_y = largeur - 20
first_update = False
# Initializing the last distance
last_distance = 0
# Creating the car class
class Car(Widget):
angle = NumericProperty(0)
rotation = NumericProperty(0)
velocity_x = NumericProperty(0)
velocity_y = NumericProperty(0)
velocity = ReferenceListProperty(velocity_x, velocity_y)
sensor1_x = NumericProperty(0)
sensor1_y = NumericProperty(0)
sensor1 = ReferenceListProperty(sensor1_x, sensor1_y)
sensor2_x = NumericProperty(0)
sensor2_y = NumericProperty(0)
sensor2 = ReferenceListProperty(sensor2_x, sensor2_y)
sensor3_x = NumericProperty(0)
sensor3_y = NumericProperty(0)
sensor3 = ReferenceListProperty(sensor3_x, sensor3_y)
signal1 = NumericProperty(0)
signal2 = NumericProperty(0)
signal3 = NumericProperty(0)
def move(self, rotation):
self.pos = Vector(*self.velocity) + self.pos
self.rotation = rotation
self.angle = self.angle + self.rotation
self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos
self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos
self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos
self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400.
self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400.
self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400.
if self.sensor1_x>longueur-10 or self.sensor1_x<10 or self.sensor1_y>largeur-10 or self.sensor1_y<10:
self.signal1 = 1.
if self.sensor2_x>longueur-10 or self.sensor2_x<10 or self.sensor2_y>largeur-10 or self.sensor2_y<10:
self.signal2 = 1.
if self.sensor3_x>longueur-10 or self.sensor3_x<10 or self.sensor3_y>largeur-10 or self.sensor3_y<10:
self.signal3 = 1.
class Ball1(Widget):
pass
class Ball2(Widget):
pass
class Ball3(Widget):
pass
# Creating the game class
class Game(Widget):
car = ObjectProperty(None)
ball1 = ObjectProperty(None)
ball2 = ObjectProperty(None)
ball3 = ObjectProperty(None)
def serve_car(self):
self.car.center = self.center
self.car.velocity = Vector(6, 0)
def update(self, dt):
global brain
global last_reward
global scores
global last_distance
global goal_x
global goal_y
global longueur
global largeur
longueur = self.width
largeur = self.height
if first_update:
init()
xx = goal_x - self.car.x
yy = goal_y - self.car.y
orientation = Vector(*self.car.velocity).angle((xx,yy))/180.
last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation]
action = brain.update(last_reward, last_signal)
scores.append(brain.score())
rotation = action2rotation[action]
self.car.move(rotation)
distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2)
self.ball1.pos = self.car.sensor1
self.ball2.pos = self.car.sensor2
self.ball3.pos = self.car.sensor3
if sand[int(self.car.x),int(self.car.y)] > 0:
self.car.velocity = Vector(1, 0).rotate(self.car.angle)
last_reward = -1
else: # otherwise
self.car.velocity = Vector(6, 0).rotate(self.car.angle)
last_reward = -0.2
if distance < last_distance:
last_reward = 0.1
if self.car.x < 10:
self.car.x = 10
last_reward = -1
if self.car.x > self.width - 10:
self.car.x = self.width - 10
last_reward = -1
if self.car.y < 10:
self.car.y = 10
last_reward = -1
if self.car.y > self.height - 10:
self.car.y = self.height - 10
last_reward = -1
if distance < 100:
goal_x = self.width-goal_x
goal_y = self.height-goal_y
last_distance = distance
# Adding the painting tools
class MyPaintWidget(Widget):
def on_touch_down(self, touch):
global length, n_points, last_x, last_y
with self.canvas:
Color(0.8,0.7,0)
d = 10.
touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10)
last_x = int(touch.x)
last_y = int(touch.y)
n_points = 0
length = 0
sand[int(touch.x),int(touch.y)] = 1
def on_touch_move(self, touch):
global length, n_points, last_x, last_y
if touch.button == 'left':
touch.ud['line'].points += [touch.x, touch.y]
x = int(touch.x)
y = int(touch.y)
length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2))
n_points += 1.
density = n_points/(length)
touch.ud['line'].width = int(20 * density + 1)
sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1
last_x = x
last_y = y
# Adding the API Buttons (clear, save and load)
class CarApp(App):
def build(self):
parent = Game()
parent.serve_car()
Clock.schedule_interval(parent.update, 1.0/60.0)
self.painter = MyPaintWidget()
clearbtn = Button(text = 'clear')
savebtn = Button(text = 'save', pos = (parent.width, 0))
loadbtn = Button(text = 'load', pos = (2 * parent.width, 0))
clearbtn.bind(on_release = self.clear_canvas)
savebtn.bind(on_release = self.save)
loadbtn.bind(on_release = self.load)
parent.add_widget(self.painter)
parent.add_widget(clearbtn)
parent.add_widget(savebtn)
parent.add_widget(loadbtn)
return parent
def clear_canvas(self, obj):
global sand
self.painter.canvas.clear()
sand = np.zeros((longueur,largeur))
def save(self, obj):
print("saving brain...")
brain.save()
plt.plot(scores)
plt.show()
def load(self, obj):
print("loading last saved brain...")
brain.load()
# Running the whole thing
if __name__ == '__main__':
CarApp().run()

View File

@ -0,0 +1,249 @@
# Self Driving Car
# Importing the libraries
import numpy as np
from random import random, randint
import matplotlib.pyplot as plt
import time
# Importing the Kivy packages
from kivy.app import App
from kivy.uix.widget import Widget
from kivy.uix.button import Button
from kivy.graphics import Color, Ellipse, Line
from kivy.config import Config
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
from kivy.vector import Vector
from kivy.clock import Clock
# Importing the Dqn object from our AI in ai.py
from ai2 import Dqn
# Implémenter un timer
from timeit import default_timer
starting_time = default_timer()
duration_time = default_timer() - starting_time
# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0
length = 0
# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = Dqn(6,3,0.9)
action2rotation = [0,20,-20]
last_reward = 0
scores = []
# Initializing the map
first_update = True
def init():
global sand
global goal_x
global goal_y
global first_update
sand = np.zeros((longueur,largeur))
goal_x = 20
goal_y = largeur - 20
first_update = False
# Initializing the last distance
last_distance = 0
# Creating the car class
class Car(Widget):
angle = NumericProperty(0)
rotation = NumericProperty(0)
velocity_x = NumericProperty(0)
velocity_y = NumericProperty(0)
velocity = ReferenceListProperty(velocity_x, velocity_y)
sensor1_x = NumericProperty(0)
sensor1_y = NumericProperty(0)
sensor1 = ReferenceListProperty(sensor1_x, sensor1_y)
sensor2_x = NumericProperty(0)
sensor2_y = NumericProperty(0)
sensor2 = ReferenceListProperty(sensor2_x, sensor2_y)
sensor3_x = NumericProperty(0)
sensor3_y = NumericProperty(0)
sensor3 = ReferenceListProperty(sensor3_x, sensor3_y)
signal1 = NumericProperty(0)
signal2 = NumericProperty(0)
signal3 = NumericProperty(0)
def move(self, rotation):
self.pos = Vector(*self.velocity) + self.pos
self.rotation = rotation
self.angle = self.angle + self.rotation
self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos
self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos
self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos
self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400.
self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400.
self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400.
if self.sensor1_x>longueur-10 or self.sensor1_x<10 or self.sensor1_y>largeur-10 or self.sensor1_y<10:
self.signal1 = 1.
if self.sensor2_x>longueur-10 or self.sensor2_x<10 or self.sensor2_y>largeur-10 or self.sensor2_y<10:
self.signal2 = 1.
if self.sensor3_x>longueur-10 or self.sensor3_x<10 or self.sensor3_y>largeur-10 or self.sensor3_y<10:
self.signal3 = 1.
class Ball1(Widget):
pass
class Ball2(Widget):
pass
class Ball3(Widget):
pass
# Creating the game class
class Game(Widget):
car = ObjectProperty(None)
ball1 = ObjectProperty(None)
ball2 = ObjectProperty(None)
ball3 = ObjectProperty(None)
def serve_car(self):
self.car.center = self.center
self.car.velocity = Vector(6, 0)
def update(self, dt):
global brain
global last_reward
global scores
global last_distance
global goal_x
global goal_y
global longueur
global largeur
global starting_time
global duration_time
longueur = self.width
largeur = self.height
if first_update:
init()
xx = goal_x - self.car.x
yy = goal_y - self.car.y
orientation = Vector(*self.car.velocity).angle((xx,yy))/180.
last_signal = [self.car.signal1, self.car.signal2, self.car.signal3,
orientation, -orientation, duration_time]
action = brain.update(last_reward, last_signal)
scores.append(brain.score())
rotation = action2rotation[action]
self.car.move(rotation)
distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2)
self.ball1.pos = self.car.sensor1
self.ball2.pos = self.car.sensor2
self.ball3.pos = self.car.sensor3
if sand[int(self.car.x),int(self.car.y)] > 0:
self.car.velocity = Vector(1, 0).rotate(self.car.angle)
last_reward = -1.5
else: # otherwise
self.car.velocity = Vector(6, 0).rotate(self.car.angle)
last_reward = -0.1
if distance < last_distance:
last_reward = 0.1
if self.car.x < 10:
self.car.x = 10
last_reward = -1
if self.car.x > self.width - 10:
self.car.x = self.width - 10
last_reward = -1
if self.car.y < 10:
self.car.y = 10
last_reward = -1
if self.car.y > self.height - 10:
self.car.y = self.height - 10
last_reward = -1
if distance < 100:
goal_x = self.width-goal_x
goal_y = self.height-goal_y
last_reward = 2
starting_time = default_timer()
duration_time = default_timer() - starting_time
if duration_time > 10:
last_reward = last_reward - 0.2
last_distance = distance
# Adding the painting tools
class MyPaintWidget(Widget):
def on_touch_down(self, touch):
global length, n_points, last_x, last_y
with self.canvas:
Color(0.8,0.7,0)
d = 10.
touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10)
last_x = int(touch.x)
last_y = int(touch.y)
n_points = 0
length = 0
sand[int(touch.x),int(touch.y)] = 1
def on_touch_move(self, touch):
global length, n_points, last_x, last_y
if touch.button == 'left':
touch.ud['line'].points += [touch.x, touch.y]
x = int(touch.x)
y = int(touch.y)
length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2))
n_points += 1.
density = n_points/(length)
touch.ud['line'].width = int(20 * density + 1)
sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1
last_x = x
last_y = y
# Adding the API Buttons (clear, save and load)
class CarApp(App):
def build(self):
parent = Game()
parent.serve_car()
Clock.schedule_interval(parent.update, 1.0/60.0)
self.painter = MyPaintWidget()
clearbtn = Button(text = 'clear')
savebtn = Button(text = 'save', pos = (parent.width, 0))
loadbtn = Button(text = 'load', pos = (2 * parent.width, 0))
clearbtn.bind(on_release = self.clear_canvas)
savebtn.bind(on_release = self.save)
loadbtn.bind(on_release = self.load)
parent.add_widget(self.painter)
parent.add_widget(clearbtn)
parent.add_widget(savebtn)
parent.add_widget(loadbtn)
return parent
def clear_canvas(self, obj):
global sand
self.painter.canvas.clear()
sand = np.zeros((longueur,largeur))
def save(self, obj):
print("saving brain...")
brain.save()
plt.plot(scores)
plt.show()
def load(self, obj):
print("loading last saved brain...")
brain.load()
# Running the whole thing
if __name__ == '__main__':
CarApp().run()

View File

@ -0,0 +1,236 @@
# Self Driving Car
# Importing the libraries
import numpy as np
from random import random, randint
import matplotlib.pyplot as plt
import time
# Importing the Kivy packages
from kivy.app import App
from kivy.uix.widget import Widget
from kivy.uix.button import Button
from kivy.graphics import Color, Ellipse, Line
from kivy.config import Config
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
from kivy.vector import Vector
from kivy.clock import Clock
# Importing the Dqn object from our AI in ia.py
from ai import Dqn
# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0 # the total number of points in the last drawing
length = 0 # the length of the last drawing
# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = Dqn(5,3,0.9) # 5 sensors, 3 actions, gama = 0.9
action2rotation = [0,20,-20] # action = 0 => no rotation, action = 1 => rotate 20 degres, action = 2 => rotate -20 degres
last_reward = 0 # initializing the last reward
scores = [] # initializing the mean score curve (sliding window of the rewards) with respect to time
# Initializing the map
first_update = True # using this trick to initialize the map only once
def init():
global sand # sand is an array that has as many cells as our graphic interface has pixels. Each cell has a one if there is sand, 0 otherwise.
global goal_x # x-coordinate of the goal (where the car has to go, that is the airport or the downtown)
global goal_y # y-coordinate of the goal (where the car has to go, that is the airport or the downtown)
sand = np.zeros((longueur,largeur)) # initializing the sand array with only zeros
goal_x = 20 # the goal to reach is at the upper left of the map (the x-coordinate is 20 and not 0 because the car gets bad reward if it touches the wall)
goal_y = largeur - 20 # the goal to reach is at the upper left of the map (y-coordinate)
first_update = False # trick to initialize the map only once
# Initializing the last distance
last_distance = 0
# Creating the car class (to understand "NumericProperty" and "ReferenceListProperty", see kivy tutorials: https://kivy.org/docs/tutorials/pong.html)
class Car(Widget):
angle = NumericProperty(0) # initializing the angle of the car (angle between the x-axis of the map and the axis of the car)
rotation = NumericProperty(0) # initializing the last rotation of the car (after playing the action, the car does a rotation of 0, 20 or -20 degrees)
velocity_x = NumericProperty(0) # initializing the x-coordinate of the velocity vector
velocity_y = NumericProperty(0) # initializing the y-coordinate of the velocity vector
velocity = ReferenceListProperty(velocity_x, velocity_y) # velocity vector
sensor1_x = NumericProperty(0) # initializing the x-coordinate of the first sensor (the one that looks forward)
sensor1_y = NumericProperty(0) # initializing the y-coordinate of the first sensor (the one that looks forward)
sensor1 = ReferenceListProperty(sensor1_x, sensor1_y) # first sensor vector
sensor2_x = NumericProperty(0) # initializing the x-coordinate of the second sensor (the one that looks 30 degrees to the left)
sensor2_y = NumericProperty(0) # initializing the y-coordinate of the second sensor (the one that looks 30 degrees to the left)
sensor2 = ReferenceListProperty(sensor2_x, sensor2_y) # second sensor vector
sensor3_x = NumericProperty(0) # initializing the x-coordinate of the third sensor (the one that looks 30 degrees to the right)
sensor3_y = NumericProperty(0) # initializing the y-coordinate of the third sensor (the one that looks 30 degrees to the right)
sensor3 = ReferenceListProperty(sensor3_x, sensor3_y) # third sensor vector
signal1 = NumericProperty(0) # initializing the signal received by sensor 1
signal2 = NumericProperty(0) # initializing the signal received by sensor 2
signal3 = NumericProperty(0) # initializing the signal received by sensor 3
def move(self, rotation):
self.pos = Vector(*self.velocity) + self.pos # updating the position of the car according to its last position and velocity
self.rotation = rotation # getting the rotation of the car
self.angle = self.angle + self.rotation # updating the angle
self.sensor1 = Vector(30, 0).rotate(self.angle) + self.pos # updating the position of sensor 1
self.sensor2 = Vector(30, 0).rotate((self.angle+30)%360) + self.pos # updating the position of sensor 2
self.sensor3 = Vector(30, 0).rotate((self.angle-30)%360) + self.pos # updating the position of sensor 3
self.signal1 = int(np.sum(sand[int(self.sensor1_x)-10:int(self.sensor1_x)+10, int(self.sensor1_y)-10:int(self.sensor1_y)+10]))/400. # getting the signal received by sensor 1 (density of sand around sensor 1)
self.signal2 = int(np.sum(sand[int(self.sensor2_x)-10:int(self.sensor2_x)+10, int(self.sensor2_y)-10:int(self.sensor2_y)+10]))/400. # getting the signal received by sensor 2 (density of sand around sensor 2)
self.signal3 = int(np.sum(sand[int(self.sensor3_x)-10:int(self.sensor3_x)+10, int(self.sensor3_y)-10:int(self.sensor3_y)+10]))/400. # getting the signal received by sensor 3 (density of sand around sensor 3)
if self.sensor1_x > longueur-10 or self.sensor1_x<10 or self.sensor1_y>largeur-10 or self.sensor1_y<10: # if sensor 1 is out of the map (the car is facing one edge of the map)
self.signal1 = 1. # sensor 1 detects full sand
if self.sensor2_x > longueur-10 or self.sensor2_x<10 or self.sensor2_y>largeur-10 or self.sensor2_y<10: # if sensor 2 is out of the map (the car is facing one edge of the map)
self.signal2 = 1. # sensor 2 detects full sand
if self.sensor3_x > longueur-10 or self.sensor3_x<10 or self.sensor3_y>largeur-10 or self.sensor3_y<10: # if sensor 3 is out of the map (the car is facing one edge of the map)
self.signal3 = 1. # sensor 3 detects full sand
class Ball1(Widget): # sensor 1 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
pass
class Ball2(Widget): # sensor 2 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
pass
class Ball3(Widget): # sensor 3 (see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
pass
# Creating the game class (to understand "ObjectProperty", see kivy tutorials: kivy https://kivy.org/docs/tutorials/pong.html)
class Game(Widget):
car = ObjectProperty(None) # getting the car object from our kivy file
ball1 = ObjectProperty(None) # getting the sensor 1 object from our kivy file
ball2 = ObjectProperty(None) # getting the sensor 2 object from our kivy file
ball3 = ObjectProperty(None) # getting the sensor 3 object from our kivy file
def serve_car(self): # starting the car when we launch the application
self.car.center = self.center # the car will start at the center of the map
self.car.velocity = Vector(6, 0) # the car will start to go horizontally to the right with a speed of 6
def update(self, dt): # the big update function that updates everything that needs to be updated at each discrete time t when reaching a new state (getting new signals from the sensors)
global brain # specifying the global variables (the brain of the car, that is our AI)
global last_reward # specifying the global variables (the last reward)
global scores # specifying the global variables (the means of the rewards)
global last_distance # specifying the global variables (the last distance from the car to the goal)
global goal_x # specifying the global variables (x-coordinate of the goal)
global goal_y # specifying the global variables (y-coordinate of the goal)
global longueur # specifying the global variables (width of the map)
global largeur # specifying the global variables (height of the map)
longueur = self.width # width of the map (horizontal edge)
largeur = self.height # height of the map (vertical edge)
if first_update: # trick to initialize the map only once
init()
xx = goal_x - self.car.x # difference of x-coordinates between the goal and the car
yy = goal_y - self.car.y # difference of y-coordinates between the goal and the car
orientation = Vector(*self.car.velocity).angle((xx,yy))/180. # direction of the car with respect to the goal (if the car is heading perfectly towards the goal, then orientation = 0)
last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation] # our input state vector, composed of the three signals received by the three sensors, plus the orientation and -orientation
action = brain.update(last_reward, last_signal) # playing the action from our ai (the object brain of the dqn class)
scores.append(brain.score()) # appending the score (mean of the last 100 rewards to the reward window)
rotation = action2rotation[action] # converting the action played (0, 1 or 2) into the rotation angle (0°, 20° or -20°)
self.car.move(rotation) # moving the car according to this last rotation angle
distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2) # getting the new distance between the car and the goal right after the car moved
self.ball1.pos = self.car.sensor1 # updating the position of the first sensor (ball1) right after the car moved
self.ball2.pos = self.car.sensor2 # updating the position of the second sensor (ball2) right after the car moved
self.ball3.pos = self.car.sensor3 # updating the position of the third sensor (ball3) right after the car moved
if sand[int(self.car.x),int(self.car.y)] > 0: # if the car is on the sand
self.car.velocity = Vector(1, 0).rotate(self.car.angle) # it is slowed down (speed = 1)
last_reward = -1 # and reward = -1
else: # otherwise
self.car.velocity = Vector(6, 0).rotate(self.car.angle) # it goes to a normal speed (speed = 6)
last_reward = -0.2 # and it gets bad reward (-0.2)
if distance < last_distance: # however if it getting close to the goal
last_reward = 0.1 # it still gets slightly positive reward 0.1
if self.car.x < 10: # if the car is in the left edge of the frame
self.car.x = 10 # it is not slowed down
last_reward = -1 # but it gets bad reward -1
if self.car.x > self.width-10: # if the car is in the right edge of the frame
self.car.x = self.width-10 # it is not slowed down
last_reward = -1 # but it gets bad reward -1
if self.car.y < 10: # if the car is in the bottom edge of the frame
self.car.y = 10 # it is not slowed down
last_reward = -1 # but it gets bad reward -1
if self.car.y > self.height-10: # if the car is in the upper edge of the frame
self.car.y = self.height-10 # it is not slowed down
last_reward = -1 # but it gets bad reward -1
if distance < 100: # when the car reaches its goal
goal_x = self.width - goal_x # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the x-coordinate of the goal)
goal_y = self.height - goal_y # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the y-coordinate of the goal)
# Updating the last distance from the car to the goal
last_distance = distance
# Painting for graphic interface (see kivy tutorials: https://kivy.org/docs/tutorials/firstwidget.html)
class MyPaintWidget(Widget):
def on_touch_down(self, touch): # putting some sand when we do a left click
global length,n_points,last_x,last_y
with self.canvas:
Color(0.8,0.7,0)
d=10.
touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10)
last_x = int(touch.x)
last_y = int(touch.y)
n_points = 0
length = 0
sand[int(touch.x),int(touch.y)] = 1
def on_touch_move(self, touch): # putting some sand when we move the mouse while pressing left
global length,n_points,last_x,last_y
if touch.button=='left':
touch.ud['line'].points += [touch.x, touch.y]
x = int(touch.x)
y = int(touch.y)
length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2))
n_points += 1.
density = n_points/(length)
touch.ud['line'].width = int(20*density + 1)
sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1
last_x = x
last_y = y
# API and switches interface (see kivy tutorials: https://kivy.org/docs/tutorials/pong.html)
class CarApp(App):
def build(self): # building the app
parent = Game()
parent.serve_car()
Clock.schedule_interval(parent.update, 1.0 / 60.0)
self.painter = MyPaintWidget()
clearbtn = Button(text='clear')
savebtn = Button(text='save',pos=(parent.width,0))
loadbtn = Button(text='load',pos=(2*parent.width,0))
clearbtn.bind(on_release=self.clear_canvas)
savebtn.bind(on_release=self.save)
loadbtn.bind(on_release=self.load)
parent.add_widget(self.painter)
parent.add_widget(clearbtn)
parent.add_widget(savebtn)
parent.add_widget(loadbtn)
return parent
def clear_canvas(self, obj): # clear button
global sand
self.painter.canvas.clear()
sand = np.zeros((longueur,largeur))
def save(self, obj): # save button
print("saving brain...")
brain.save()
plt.plot(scores)
plt.show()
def load(self, obj): # load button
print("loading last saved brain...")
brain.load()
# Running the app
if __name__ == '__main__':
CarApp().run()

View File

@ -0,0 +1,155 @@
# AI for Doom
# Importing the libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
# Importing the packages for OpenAI and Doom
import gym
from gym import wrappers
import vizdoomgym
# Importing the other Python files
import experience_replay, image_preprocessing
# Part 1 - Building the AI
# Making the brain
class CNN(nn.Module):
def __init__(self, number_actions):
super(CNN, self).__init__()
self.convolution1 = nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 5)
self.convolution2 = nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 3)
self.convolution3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 2)
self.fc1 = nn.Linear(in_features = self.count_neurons((1, 80, 80)), out_features = 40)
self.fc2 = nn.Linear(in_features = 40, out_features = number_actions)
def count_neurons(self, image_dim):
x = Variable(torch.rand(1, *image_dim))
x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2))
x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2))
x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2))
return x.data.view(1, -1).size(1)
def forward(self, x):
x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2))
x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2))
x = F.relu(F.max_pool2d(self.convolution3(x), 3, 2))
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
# Making the body
class SoftmaxBody(nn.Module):
def __init__(self, T):
super(SoftmaxBody, self).__init__()
self.T = T
def forward(self, outputs):
probs = F.softmax(outputs * self.T)
actions = probs.multinomial(num_samples=1)
return actions
# Making the AI
class AI:
def __init__(self, brain, body):
self.brain = brain
self.body = body
def __call__(self, inputs):
input = Variable(torch.from_numpy(np.array(inputs, dtype = np.float32)))
output = self.brain(input)
actions = self.body(output)
return actions.data.numpy()
# Part 2 - Training the AI with Deep Convolutional Q-Learning
# Getting the Doom environment
doom_env = image_preprocessing.PreprocessImage(gym.make("VizdoomCorridor-v0"), width=80, height=80, grayscale=True)
doom_env = wrappers.Monitor(doom_env, "videos", force = True)
number_actions = doom_env.action_space.n
# Building an AI
cnn = CNN(number_actions)
softmax_body = SoftmaxBody(T = 1.0)
ai = AI(brain = cnn, body = softmax_body)
# Setting up Experience Replay
n_steps = experience_replay.NStepProgress(env = doom_env, ai = ai, n_step = 10)
memory = experience_replay.ReplayMemory(n_steps = n_steps, capacity = 10000)
# Implementing Eligibility Trace
def eligibility_trace(batch):
gamma = 0.99
inputs = []
targets = []
for series in batch:
input = Variable(torch.from_numpy(np.array([series[0].state, series[-1].state], dtype = np.float32)))
output = cnn(input)
cumul_reward = 0.0 if series[-1].done else output[1].data.max()
for step in reversed(series[:-1]):
cumul_reward = step.reward + gamma * cumul_reward
state = series[0].state
target = output[0].data
target[series[0].action] = cumul_reward
inputs.append(state)
targets.append(target)
return torch.from_numpy(np.array(inputs, dtype = np.float32)), torch.stack(targets)
# Making the moving average on 100 steps
class MA:
def __init__(self, size):
self.list_of_rewards = []
self.size = size
def add(self, rewards):
if isinstance(rewards, list):
self.list_of_rewards += rewards
else:
self.list_of_rewards.append(rewards)
while len(self.list_of_rewards) > self.size:
del self.list_of_rewards[0]
def average(self):
return np.mean(self.list_of_rewards)
ma = MA(100)
# Training the AI
loss = nn.MSELoss()
optimizer = optim.Adam(cnn.parameters(), lr = 0.001)
nb_epochs = 100
for epoch in range(1, nb_epochs + 1):
memory.run_steps(200)
for batch in memory.sample_batch(128):
inputs, targets = eligibility_trace(batch)
inputs, targets = Variable(inputs), Variable(targets)
predictions = cnn(inputs)
loss_error = loss(predictions, targets)
optimizer.zero_grad()
loss_error.backward()
optimizer.step()
rewards_steps = n_steps.rewards_steps()
ma.add(rewards_steps)
avg_reward = ma.average()
print("Epoch: %s, Average Reward: %s" % (str(epoch), str(avg_reward)))
if avg_reward >= 1500:
print("Congratulations, your AI wins")
break
# Closing the Doom environment
doom_env.close()

View File

@ -0,0 +1,75 @@
# Experience Replay
# Importing the libraries
import numpy as np
from collections import namedtuple, deque
# Defining one Step
Step = namedtuple('Step', ['state', 'action', 'reward', 'done'])
# Making the AI progress on several (n_step) steps
class NStepProgress:
def __init__(self, env, ai, n_step):
self.ai = ai
self.rewards = []
self.env = env
self.n_step = n_step
def __iter__(self):
state = self.env.reset()
history = deque()
reward = 0.0
while True:
self.env.render()
action = self.ai(np.array([state]))[0][0]
next_state, r, is_done, _ = self.env.step(action)
reward += r
history.append(Step(state = state, action = action, reward = r, done = is_done))
while len(history) > self.n_step + 1:
history.popleft()
if len(history) == self.n_step + 1:
yield tuple(history)
state = next_state
if is_done:
if len(history) > self.n_step + 1:
history.popleft()
while len(history) >= 1:
yield tuple(history)
history.popleft()
self.rewards.append(reward)
reward = 0.0
state = self.env.reset()
history.clear()
def rewards_steps(self):
rewards_steps = self.rewards
self.rewards = []
return rewards_steps
# Implementing Experience Replay
class ReplayMemory:
def __init__(self, n_steps, capacity = 10000):
self.capacity = capacity
self.n_steps = n_steps
self.n_steps_iter = iter(n_steps)
self.buffer = deque()
def sample_batch(self, batch_size): # creates an iterator that returns random batches
ofs = 0
vals = list(self.buffer)
np.random.shuffle(vals)
while (ofs+1)*batch_size <= len(self.buffer):
yield vals[ofs*batch_size:(ofs+1)*batch_size]
ofs += 1
def run_steps(self, samples):
while samples > 0:
entry = next(self.n_steps_iter) # 10 consecutive steps
self.buffer.append(entry) # we put 200 for the current episode
samples -= 1
while len(self.buffer) > self.capacity: # we accumulate no more than the capacity (10000)
self.buffer.popleft()

View File

@ -0,0 +1,28 @@
# Image Preprocessing
# Importing the libraries
import numpy as np
from scipy.misc import imresize
from gym.core import ObservationWrapper
from gym.spaces.box import Box
# Preprocessing the Images
class PreprocessImage(ObservationWrapper):
def __init__(self, env, height = 64, width = 64, grayscale = True, crop = lambda img: img):
super(PreprocessImage, self).__init__(env)
self.img_size = (height, width)
self.grayscale = grayscale
self.crop = crop
n_colors = 1 if self.grayscale else 3
self.observation_space = Box(0.0, 1.0, [n_colors, height, width])
def _observation(self, img):
img = self.crop(img)
img = imresize(img, self.img_size)
if self.grayscale:
img = img.mean(-1, keepdims = True)
img = np.transpose(img, (2, 0, 1))
img = img.astype('float32') / 255.
return img

View File

@ -0,0 +1,67 @@
# Improvement of the Gym environment with universe
import cv2
import gym
import numpy as np
from gym.spaces.box import Box
from gym import wrappers
# Taken from https://github.com/openai/universe-starter-agent
def create_atari_env(env_id, video=False):
env = gym.make(env_id)
if video:
env = wrappers.Monitor(env, 'test', force=True)
env = MyAtariRescale42x42(env)
env = MyNormalizedEnv(env)
return env
def _process_frame42(frame):
frame = frame[34:34 + 160, :160]
# Resize by half, then down to 42x42 (essentially mipmapping). If
# we resize directly we lose pixels that, when mapped to 42x42,
# aren't close enough to the pixel boundary.
frame = cv2.resize(frame, (80, 80))
frame = cv2.resize(frame, (42, 42))
frame = frame.mean(2)
frame = frame.astype(np.float32)
frame *= (1.0 / 255.0)
#frame = np.reshape(frame, [1, 42, 42])
return frame
class MyAtariRescale42x42(gym.ObservationWrapper):
def __init__(self, env=None):
super(MyAtariRescale42x42, self).__init__(env)
self.observation_space = Box(0.0, 1.0, [1, 42, 42])
def _observation(self, observation):
return _process_frame42(observation)
class MyNormalizedEnv(gym.ObservationWrapper):
def __init__(self, env=None):
super(MyNormalizedEnv, self).__init__(env)
self.state_mean = 0
self.state_std = 0
self.alpha = 0.9999
self.num_steps = 0
def _observation(self, observation):
self.num_steps += 1
self.state_mean = self.state_mean * self.alpha + \
observation.mean() * (1 - self.alpha)
self.state_std = self.state_std * self.alpha + \
observation.std() * (1 - self.alpha)
unbiased_mean = self.state_mean / (1 - pow(self.alpha, self.num_steps))
unbiased_std = self.state_std / (1 - pow(self.alpha, self.num_steps))
ret = (observation - unbiased_mean) / (unbiased_std + 1e-8)
return np.expand_dims(ret, axis=0)

View File

@ -0,0 +1,43 @@
# Main code
from __future__ import print_function
import os
import torch
import torch.multiprocessing as mp
from envs import create_atari_env
from model import ActorCritic
from train import train
from test import test
import my_optim
# Gathering all the parameters (that we can modify to explore)
class Params():
def __init__(self):
self.lr = 0.0001
self.gamma = 0.99
self.tau = 1.
self.seed = 1
self.num_processes = 16
self.num_steps = 20
self.max_episode_length = 10000
self.env_name = 'Breakout-v0'
# Main run
os.environ['OMP_NUM_THREADS'] = '1'
params = Params()
torch.manual_seed(params.seed)
env = create_atari_env(params.env_name)
shared_model = ActorCritic(env.observation_space.shape[0], env.action_space)
shared_model.share_memory()
optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=params.lr)
optimizer.share_memory()
processes = []
p = mp.Process(target=test, args=(params.num_processes, params, shared_model))
p.start()
processes.append(p)
for rank in range(0, params.num_processes):
p = mp.Process(target=train, args=(rank, params, shared_model, optimizer))
p.start()
processes.append(p)
for p in processes:
p.join()

View File

@ -0,0 +1,65 @@
# AI for Breakout
# Importing the librairies
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
# Initializing and setting the variance of a tensor of weights
def normalized_columns_initializer(weights, std=1.0):
out = torch.randn(weights.size())
out *= std / torch.sqrt(out.pow(2).sum(1, True))
return out
# Initializing the weights of the neural network in an optimal way for the learning
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
weight_shape = list(m.weight.data.size())
fan_in = np.prod(weight_shape[1:4])
fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]
w_bound = np.sqrt(6. / (fan_in + fan_out))
m.weight.data.uniform_(-w_bound, w_bound)
m.bias.data.fill_(0)
elif classname.find('Linear') != -1:
weight_shape = list(m.weight.data.size())
fan_in = weight_shape[1]
fan_out = weight_shape[0]
w_bound = np.sqrt(6. / (fan_in + fan_out))
m.weight.data.uniform_(-w_bound, w_bound)
m.bias.data.fill_(0)
# Making the A3C brain
class ActorCritic(torch.nn.Module):
def __init__(self, num_inputs, action_space):
super(ActorCritic, self).__init__()
self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1)
self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
self.lstm = nn.LSTMCell(32 * 3 * 3, 256)
num_outputs = action_space.n
self.critic_linear = nn.Linear(256, 1)
self.actor_linear = nn.Linear(256, num_outputs)
self.apply(weights_init)
self.actor_linear.weight.data = normalized_columns_initializer(self.actor_linear.weight.data, 0.01)
self.actor_linear.bias.data.fill_(0)
self.critic_linear.weight.data = normalized_columns_initializer(self.critic_linear.weight.data, 1.0)
self.critic_linear.bias.data.fill_(0)
self.lstm.bias_ih.data.fill_(0)
self.lstm.bias_hh.data.fill_(0)
self.train()
def forward(self, inputs):
inputs, (hx, cx) = inputs
x = F.elu(self.conv1(inputs))
x = F.elu(self.conv2(x))
x = F.elu(self.conv3(x))
x = F.elu(self.conv4(x))
x = x.view(-1, 32 * 3 * 3)
hx, cx = self.lstm(x, (hx, cx))
x = hx
return self.critic_linear(x), self.actor_linear(x), (hx, cx)

View File

@ -0,0 +1,46 @@
# Optimizer
import math
import torch
import torch.optim as optim
class SharedAdam(optim.Adam):
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
super(SharedAdam, self).__init__(params, lr, betas, eps, weight_decay)
for group in self.param_groups:
for p in group['params']:
state = self.state[p]
state['step'] = torch.zeros(1)
state['exp_avg'] = p.data.new().resize_as_(p.data).zero_()
state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_()
def share_memory(self):
for group in self.param_groups:
for p in group['params']:
state = self.state[p]
state['step'].share_memory_()
state['exp_avg'].share_memory_()
state['exp_avg_sq'].share_memory_()
def step(self):
loss = None
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
state = self.state[p]
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
beta1, beta2 = group['betas']
state['step'] += 1
if group['weight_decay'] != 0:
grad = grad.add(group['weight_decay'], p.data)
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
denom = exp_avg_sq.sqrt().add_(group['eps'])
bias_correction1 = 1 - beta1 ** state['step'][0]
bias_correction2 = 1 - beta2 ** state['step'][0]
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
p.data.addcdiv_(-step_size, exp_avg, denom)
return loss

View File

@ -0,0 +1,45 @@
# Test Agent
import torch
import torch.nn.functional as F
from envs import create_atari_env
from model import ActorCritic
from torch.autograd import Variable
import time
from collections import deque
def test(rank, params, shared_model):
torch.manual_seed(params.seed + rank)
env = create_atari_env(params.env_name, video=True)
env.seed(params.seed + rank)
model = ActorCritic(env.observation_space.shape[0], env.action_space)
model.eval()
state = env.reset()
state = torch.from_numpy(state)
reward_sum = 0
done = True
start_time = time.time()
actions = deque(maxlen=100)
episode_length = 0
while True:
episode_length += 1
if done:
model.load_state_dict(shared_model.state_dict())
cx = Variable(torch.zeros(1, 256), volatile=True)
hx = Variable(torch.zeros(1, 256), volatile=True)
else:
cx = Variable(cx.data, volatile=True)
hx = Variable(hx.data, volatile=True)
value, action_value, (hx, cx) = model((Variable(state.unsqueeze(0), volatile=True), (hx, cx)))
prob = F.softmax(action_value)
action = prob.max(1)[1].data.numpy()
state, reward, done, _ = env.step(action[0])
reward_sum += reward
if done:
print("Time {}, episode reward {}, episode length {}".format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, episode_length))
reward_sum = 0
episode_length = 0
actions.clear()
state = env.reset()
time.sleep(60)
state = torch.from_numpy(state)

View File

@ -0,0 +1,77 @@
# Training the AI
import torch
import torch.nn.functional as F
from envs import create_atari_env
from model import ActorCritic
from torch.autograd import Variable
def ensure_shared_grads(model, shared_model):
for param, shared_param in zip(model.parameters(), shared_model.parameters()):
if shared_param.grad is not None:
return
shared_param._grad = param.grad
def train(rank, params, shared_model, optimizer):
torch.manual_seed(params.seed + rank)
env = create_atari_env(params.env_name)
env.seed(params.seed + rank)
model = ActorCritic(env.observation_space.shape[0], env.action_space)
state = env.reset()
state = torch.from_numpy(state)
done = True
episode_length = 0
while True:
episode_length += 1
model.load_state_dict(shared_model.state_dict())
if done:
cx = Variable(torch.zeros(1, 256))
hx = Variable(torch.zeros(1, 256))
else:
cx = Variable(cx.data)
hx = Variable(hx.data)
values = []
log_probs = []
rewards = []
entropies = []
for step in range(params.num_steps):
value, action_values, (hx, cx) = model((Variable(state.unsqueeze(0)), (hx, cx)))
prob = F.softmax(action_values)
log_prob = F.log_softmax(action_values)
entropy = -(log_prob * prob).sum(1)
entropies.append(entropy)
action = prob.multinomial().data
log_prob = log_prob.gather(1, Variable(action))
values.append(value)
log_probs.append(log_prob)
state, reward, done, _ = env.step(action.numpy())
done = (done or episode_length >= params.max_episode_length)
reward = max(min(reward, 1), -1)
if done:
episode_length = 0
state = env.reset()
state = torch.from_numpy(state)
rewards.append(reward)
if done:
break
R = torch.zeros(1, 1)
if not done:
value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx)))
R = value.data
values.append(Variable(R))
policy_loss = 0
value_loss = 0
R = Variable(R)
gae = torch.zeros(1, 1)
for i in reversed(range(len(rewards))):
R = params.gamma * R + rewards[i]
advantage = R - values[i]
value_loss = value_loss + 0.5 * advantage.pow(2)
TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data
gae = gae * params.gamma * params.tau + TD
policy_loss = policy_loss - log_probs[i] * Variable(gae) - 0.01 * entropies[i]
optimizer.zero_grad()
(policy_loss + 0.5 * value_loss).backward()
torch.nn.utils.clip_grad_norm(model.parameters(), 40)
ensure_shared_grads(model, shared_model)
optimizer.step()

View File

@ -0,0 +1,67 @@
# Improvement of the Gym environment with universe
import cv2
import gym
import numpy as np
from gym.spaces.box import Box
from gym import wrappers
# Taken from https://github.com/openai/universe-starter-agent
def create_atari_env(env_id, video=False):
env = gym.make(env_id)
if video:
env = wrappers.Monitor(env, 'test', force=True)
env = MyAtariRescale42x42(env)
env = MyNormalizedEnv(env)
return env
def _process_frame42(frame):
frame = frame[34:34 + 160, :160]
# Resize by half, then down to 42x42 (essentially mipmapping). If
# we resize directly we lose pixels that, when mapped to 42x42,
# aren't close enough to the pixel boundary.
frame = cv2.resize(frame, (80, 80))
frame = cv2.resize(frame, (42, 42))
frame = frame.mean(2)
frame = frame.astype(np.float32)
frame *= (1.0 / 255.0)
#frame = np.reshape(frame, [1, 42, 42])
return frame
class MyAtariRescale42x42(gym.ObservationWrapper):
def __init__(self, env=None):
super(MyAtariRescale42x42, self).__init__(env)
self.observation_space = Box(0.0, 1.0, [1, 42, 42])
def _observation(self, observation):
return _process_frame42(observation)
class MyNormalizedEnv(gym.ObservationWrapper):
def __init__(self, env=None):
super(MyNormalizedEnv, self).__init__(env)
self.state_mean = 0
self.state_std = 0
self.alpha = 0.9999
self.num_steps = 0
def _observation(self, observation):
self.num_steps += 1
self.state_mean = self.state_mean * self.alpha + \
observation.mean() * (1 - self.alpha)
self.state_std = self.state_std * self.alpha + \
observation.std() * (1 - self.alpha)
unbiased_mean = self.state_mean / (1 - pow(self.alpha, self.num_steps))
unbiased_std = self.state_std / (1 - pow(self.alpha, self.num_steps))
ret = (observation - unbiased_mean) / (unbiased_std + 1e-8)
return np.expand_dims(ret, axis=0)

View File

@ -0,0 +1,43 @@
# Main code
from __future__ import print_function
import os
import torch
import torch.multiprocessing as mp
from envs import create_atari_env
from model import ActorCritic
from train import train
from test import test
import my_optim
# Gathering all the parameters (that we can modify to explore)
class Params():
def __init__(self):
self.lr = 0.0001
self.gamma = 0.99
self.tau = 1.
self.seed = 1
self.num_processes = 16
self.num_steps = 20
self.max_episode_length = 10000
self.env_name = 'Breakout-v0'
# Main run
os.environ['OMP_NUM_THREADS'] = '1' # 1 thread per core
params = Params() # creating the params object from the Params class, that sets all the model parameters
torch.manual_seed(params.seed) # setting the seed (not essential)
env = create_atari_env(params.env_name) # we create an optimized environment thanks to universe
shared_model = ActorCritic(env.observation_space.shape[0], env.action_space) # shared_model is the model shared by the different agents (different threads in different cores)
shared_model.share_memory() # storing the model in the shared memory of the computer, which allows the threads to have access to this shared memory even if they are in different cores
optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=params.lr) # the optimizer is also shared because it acts on the shared model
optimizer.share_memory() # same, we store the optimizer in the shared memory so that all the agents can have access to this shared memory to optimize the model
processes = [] # initializing the processes with an empty list
p = mp.Process(target=test, args=(params.num_processes, params, shared_model)) # allowing to create the 'test' process with some arguments 'args' passed to the 'test' target function - the 'test' process doesn't update the shared model but uses it on a part of it - torch.multiprocessing.Process runs a function in an independent thread
p.start() # starting the created process p
processes.append(p) # adding the created process p to the list of processes
for rank in range(0, params.num_processes): # making a loop to run all the other processes that will be trained by updating the shared model
p = mp.Process(target=train, args=(rank, params, shared_model, optimizer))
p.start()
processes.append(p)
for p in processes: # creating a pointer that will allow to kill all the threads when at least one of the threads, or main.py will be killed, allowing to stop the program safely
p.join()

View File

@ -0,0 +1,65 @@
# AI for Breakout
# Importing the librairies
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
# Initializing and setting the variance of a tensor of weights
def normalized_columns_initializer(weights, std=1.0):
out = torch.randn(weights.size())
out *= std / torch.sqrt(out.pow(2).sum(1, True)) # thanks to this initialization, we have var(out) = std^2
return out
# Initializing the weights of the neural network in an optimal way for the learning
def weights_init(m):
classname = m.__class__.__name__ # python trick that will look for the type of connection in the object "m" (convolution or full connection)
if classname.find('Conv') != -1: # if the connection is a convolution
weight_shape = list(m.weight.data.size()) # list containing the shape of the weights in the object "m"
fan_in = np.prod(weight_shape[1:4]) # dim1 * dim2 * dim3
fan_out = np.prod(weight_shape[2:4]) * weight_shape[0] # dim0 * dim2 * dim3
w_bound = np.sqrt(6. / (fan_in + fan_out)) # weight bound
m.weight.data.uniform_(-w_bound, w_bound) # generating some random weights of order inversely proportional to the size of the tensor of weights
m.bias.data.fill_(0) # initializing all the bias with zeros
elif classname.find('Linear') != -1: # if the connection is a full connection
weight_shape = list(m.weight.data.size()) # list containing the shape of the weights in the object "m"
fan_in = weight_shape[1] # dim1
fan_out = weight_shape[0] # dim0
w_bound = np.sqrt(6. / (fan_in + fan_out)) # weight bound
m.weight.data.uniform_(-w_bound, w_bound) # generating some random weights of order inversely proportional to the size of the tensor of weights
m.bias.data.fill_(0) # initializing all the bias with zeros
# Making the A3C brain
class ActorCritic(torch.nn.Module):
def __init__(self, num_inputs, action_space):
super(ActorCritic, self).__init__()
self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1) # first convolution
self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # second convolution
self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # third convolution
self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # fourth convolution
self.lstm = nn.LSTMCell(32 * 3 * 3, 256) # making an LSTM (Long Short Term Memory) to learn the temporal properties of the input - we obtain a big encoded vector S of size 256 that encodes an event of the game
num_outputs = action_space.n # getting the number of possible actions
self.critic_linear = nn.Linear(256, 1) # full connection of the critic: output = V(S)
self.actor_linear = nn.Linear(256, num_outputs) # full connection of the actor: output = Q(S,A)
self.apply(weights_init) # initilizing the weights of the model with random weights
self.actor_linear.weight.data = normalized_columns_initializer(self.actor_linear.weight.data, 0.01) # setting the standard deviation of the actor tensor of weights to 0.01
self.actor_linear.bias.data.fill_(0) # initializing the actor bias with zeros
self.critic_linear.weight.data = normalized_columns_initializer(self.critic_linear.weight.data, 1.0) # setting the standard deviation of the critic tensor of weights to 0.01
self.critic_linear.bias.data.fill_(0) # initializing the critic bias with zeros
self.lstm.bias_ih.data.fill_(0) # initializing the lstm bias with zeros
self.lstm.bias_hh.data.fill_(0) # initializing the lstm bias with zeros
self.train() # setting the module in "train" mode to activate the dropouts and batchnorms
def forward(self, inputs):
inputs, (hx, cx) = inputs # getting separately the input images to the tuple (hidden states, cell states)
x = F.elu(self.conv1(inputs)) # forward propagating the signal from the input images to the 1st convolutional layer
x = F.elu(self.conv2(x)) # forward propagating the signal from the 1st convolutional layer to the 2nd convolutional layer
x = F.elu(self.conv3(x)) # forward propagating the signal from the 2nd convolutional layer to the 3rd convolutional layer
x = F.elu(self.conv4(x)) # forward propagating the signal from the 3rd convolutional layer to the 4th convolutional layer
x = x.view(-1, 32 * 3 * 3) # flattening the last convolutional layer into this 1D vector x
hx, cx = self.lstm(x, (hx, cx)) # the LSTM takes as input x and the old hidden & cell states and ouputs the new hidden & cell states
x = hx # getting the useful output, which are the hidden states (principle of the LSTM)
return self.critic_linear(x), self.actor_linear(x), (hx, cx) # returning the output of the critic (V(S)), the output of the actor (Q(S,A)), and the new hidden & cell states ((hx, cx))

View File

@ -0,0 +1,50 @@
# Optimizer
import math
import torch
import torch.optim as optim
# Implementing the Adam optimizer with shared states
class SharedAdam(optim.Adam): # object that inherits from optim.Adam
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
super(SharedAdam, self).__init__(params, lr, betas, eps, weight_decay) # inheriting from the tools of optim.Adam
for group in self.param_groups: # self.param_groups contains all the attributes of the optimizer, including the parameters to optimize (the weights of the network) contained in self.param_groups['params']
for p in group['params']: # for each tensor p of weights to optimize
state = self.state[p] # at the beginning, self.state is an empty dictionary so state = {} and self.state = {p:{}} = {p: state}
state['step'] = torch.zeros(1) # counting the steps: state = {'step' : tensor([0])}
state['exp_avg'] = p.data.new().resize_as_(p.data).zero_() # the update of the adam optimizer is based on an exponential moving average of the gradient (moment 1)
state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_() # the update of the adam optimizer is also based on an exponential moving average of the squared of the gradient (moment 2)
# Sharing the memory
def share_memory(self):
for group in self.param_groups:
for p in group['params']:
state = self.state[p]
state['step'].share_memory_() # tensor.share_memory_() acts a little bit like tensor.cuda()
state['exp_avg'].share_memory_() # tensor.share_memory_() acts a little bit like tensor.cuda()
state['exp_avg_sq'].share_memory_() # tensor.share_memory_() acts a little bit like tensor.cuda()
# Performing a single optimization step of the Adam algorithm (see algorithm 1 in https://arxiv.org/pdf/1412.6980.pdf)
def step(self):
loss = None
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
state = self.state[p]
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
beta1, beta2 = group['betas']
state['step'] += 1
if group['weight_decay'] != 0:
grad = grad.add(group['weight_decay'], p.data)
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
denom = exp_avg_sq.sqrt().add_(group['eps'])
bias_correction1 = 1 - beta1 ** state['step'][0]
bias_correction2 = 1 - beta2 ** state['step'][0]
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
p.data.addcdiv_(-step_size, exp_avg, denom)
return loss

View File

@ -0,0 +1,46 @@
# Test Agent
import torch
import torch.nn.functional as F
from envs import create_atari_env
from model import ActorCritic
from torch.autograd import Variable
import time
from collections import deque
# Making the test agent (won't update the model but will just use the shared model to explore)
def test(rank, params, shared_model):
torch.manual_seed(params.seed + rank) # asynchronizing the test agent
env = create_atari_env(params.env_name, video=True) # running an environment with a video
env.seed(params.seed + rank) # asynchronizing the environment
model = ActorCritic(env.observation_space.shape[0], env.action_space) # creating one model
model.eval() # putting the model in "eval" model because it won't be trained
state = env.reset() # getting the input images as numpy arrays
state = torch.from_numpy(state) # converting them into torch tensors
reward_sum = 0 # initializing the sum of rewards to 0
done = True # initializing done to True
start_time = time.time() # getting the starting time to measure the computation time
actions = deque(maxlen=100) # cf https://pymotw.com/2/collections/deque.html
episode_length = 0 # initializing the episode length to 0
while True: # repeat
episode_length += 1 # incrementing the episode length by one
if done: # synchronizing with the shared model (same as train.py)
model.load_state_dict(shared_model.state_dict())
cx = Variable(torch.zeros(1, 256), volatile=True)
hx = Variable(torch.zeros(1, 256), volatile=True)
else:
cx = Variable(cx.data, volatile=True)
hx = Variable(hx.data, volatile=True)
value, action_value, (hx, cx) = model((Variable(state.unsqueeze(0), volatile=True), (hx, cx)))
prob = F.softmax(action_value)
action = prob.max(1)[1].data.numpy() # the test agent does not explore, it directly plays the best action
state, reward, done, _ = env.step(action[0]) # done = done or episode_length >= params.max_episode_length
reward_sum += reward
if done: # printing the results at the end of each part
print("Time {}, episode reward {}, episode length {}".format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, episode_length))
reward_sum = 0 # reinitializing the sum of rewards
episode_length = 0 # reinitializing the episode length
actions.clear() # reinitializing the actions
state = env.reset() # reinitializing the environment
time.sleep(60) # doing a one minute break to let the other agents practice (if the game is done)
state = torch.from_numpy(state) # new state and we continue

View File

@ -0,0 +1,78 @@
# Training the AI
import torch
import torch.nn.functional as F
from envs import create_atari_env
from model import ActorCritic
from torch.autograd import Variable
# Implementing a function to make sure the models share the same gradient
def ensure_shared_grads(model, shared_model):
for param, shared_param in zip(model.parameters(), shared_model.parameters()):
if shared_param.grad is not None:
return
shared_param._grad = param.grad
def train(rank, params, shared_model, optimizer):
torch.manual_seed(params.seed + rank) # shifting the seed with rank to asynchronize each training agent
env = create_atari_env(params.env_name) # creating an optimized environment thanks to the create_atari_env function
env.seed(params.seed + rank) # aligning the seed of the environment on the seed of the agent
model = ActorCritic(env.observation_space.shape[0], env.action_space) # creating the model from the ActorCritic class
state = env.reset() # state is a numpy array of size 1*42*42, in black & white
state = torch.from_numpy(state) # converting the numpy array into a torch tensor
done = True # when the game is done
episode_length = 0 # initializing the length of an episode to 0
while True: # repeat
episode_length += 1 # incrementing the episode length by one
model.load_state_dict(shared_model.state_dict()) # synchronizing with the shared model - the agent gets the shared model to do an exploration on num_steps
if done: # if it is the first iteration of the while loop or if the game was just done, then:
cx = Variable(torch.zeros(1, 256)) # the cell states of the LSTM are reinitialized to zero
hx = Variable(torch.zeros(1, 256)) # the hidden states of the LSTM are reinitialized to zero
else: # else:
cx = Variable(cx.data) # we keep the old cell states, making sure they are in a torch variable
hx = Variable(hx.data) # we keep the old hidden states, making sure they are in a torch variable
values = [] # initializing the list of values (V(S))
log_probs = [] # initializing the list of log probabilities
rewards = [] # initializing the list of rewards
entropies = [] # initializing the list of entropies
for step in range(params.num_steps): # going through the num_steps exploration steps
value, action_values, (hx, cx) = model((Variable(state.unsqueeze(0)), (hx, cx))) # getting from the model the output V(S) of the critic, the output Q(S,A) of the actor, and the new hidden & cell states
prob = F.softmax(action_values) # generating a distribution of probabilities of the Q-values according to the softmax: prob(a) = exp(prob(a))/sum_b(exp(prob(b)))
log_prob = F.log_softmax(action_values) # generating a distribution of log probabilities of the Q-values according to the log softmax: log_prob(a) = log(prob(a))
entropy = -(log_prob * prob).sum(1) # H(p) = - sum_x p(x).log(p(x))
entropies.append(entropy) # storing the computed entropy
action = prob.multinomial().data # selecting an action by taking a random draw from the prob distribution
log_prob = log_prob.gather(1, Variable(action)) # getting the log prob associated to this selected action
values.append(value) # storing the value V(S) of the state
log_probs.append(log_prob) # storing the log prob of the action
state, reward, done, _ = env.step(action.numpy()) # playing the selected action, reaching the new state, and getting the new reward
done = (done or episode_length >= params.max_episode_length) # if the episode lasts too long (the agent is stucked), then it is done
reward = max(min(reward, 1), -1) # clamping the reward between -1 and +1
if done: # if the episode is done:
episode_length = 0 # we restart the environment
state = env.reset() # we restart the environment
state = torch.from_numpy(state) # tensorizing the new state
rewards.append(reward) # storing the new observed reward
if done: # if we are done
break # we stop the exploration and we directly move on to the next step: the update of the shared model
R = torch.zeros(1, 1) # intializing the cumulative reward
if not done: # if we are not done:
value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx))) # we initialize the cumulative reward with the value of the last shared state
R = value.data # we initialize the cumulative reward with the value of the last shared state
values.append(Variable(R)) # storing the value V(S) of the last reached state S
policy_loss = 0 # initializing the policy loss
value_loss = 0 # initializing the value loss
R = Variable(R) # making sure the cumulative reward R is a torch Variable
gae = torch.zeros(1, 1) # initializing the Generalized Advantage Estimation to 0
for i in reversed(range(len(rewards))): # starting from the last exploration step and going back in time
R = params.gamma * R + rewards[i] # R = gamma*R + r_t = r_0 + gamma r_1 + gamma^2 * r_2 ... + gamma^(n-1)*r_(n-1) + gamma^nb_step * V(last_state)
advantage = R - values[i] # R is an estimator of Q at time t = i so advantage_i = Q_i - V(state_i) = R - value[i]
value_loss = value_loss + 0.5 * advantage.pow(2) # computing the value loss
TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data # computing the temporal difference
gae = gae * params.gamma * params.tau + TD # gae = sum_i (gamma*tau)^i * TD(i) with gae_i = gae_(i+1)*gamma*tau + (r_i + gamma*V(state_i+1) - V(state_i))
policy_loss = policy_loss - log_probs[i] * Variable(gae) - 0.01 * entropies[i] # computing the policy loss
optimizer.zero_grad() # initializing the optimizer
(policy_loss + 0.5 * value_loss).backward() # we give 2x more importance to the policy loss than the value loss because the policy loss is smaller
torch.nn.utils.clip_grad_norm(model.parameters(), 40) # clamping the values of gradient between 0 and 40 to prevent the gradient from taking huge values and degenerating the algorithm
ensure_shared_grads(model, shared_model) # making sure the model of the agent and the shared model share the same gradient
optimizer.step() # running the optimization step

View File

@ -0,0 +1 @@
.tox

View File

@ -0,0 +1,39 @@
*.swp
*.pyc
*.py~
.DS_Store
.cache
.pytest_cache/
# Setuptools distribution and build folders.
/dist/
/build
# Virtualenv
/env
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
*.sublime-project
*.sublime-workspace
logs/
.ipynb_checkpoints
ghostdriver.log
junk
MUJOCO_LOG.txt
rllab_mujoco
tutorial/*.html
# IDE files
.eggs
.tox
# PyCharm project files
.idea
vizdoom.ini

View File

@ -0,0 +1,21 @@
sudo: required
language: python
services:
- docker
env:
# - UBUNTU_VER=14.04 - problems with atari-py
- UBUNTU_VER=16.04
- UBUNTU_VER=18.04
install: "" # so travis doesn't do pip install requirements.txt
script:
- docker build -f test.dockerfile.${UBUNTU_VER} -t gym-test --build-arg MUJOCO_KEY=$MUJOCO_KEY .
- docker run -e MUJOCO_KEY=$MUJOCO_KEY gym-test tox
deploy:
provider: pypi
username: $TWINE_USERNAME
password: $TWINE_PASSWORD
on:
tags: true
condition: $UBUNTU_VER = 16.04

View File

@ -0,0 +1,13 @@
OpenAI Gym is dedicated to providing a harassment-free experience for
everyone, regardless of gender, gender identity and expression, sexual
orientation, disability, physical appearance, body size, age, race, or
religion. We do not tolerate harassment of participants in any form.
This code of conduct applies to all OpenAI Gym spaces (including Gist
comments) both online and off. Anyone who violates this code of
conduct may be sanctioned or expelled from these spaces at the
discretion of the OpenAI team.
We may add additional rules over time, which will be made clearly
available to participants. Participants are responsible for knowing
and abiding by these rules.

View File

@ -0,0 +1,36 @@
# gym
The MIT License
Copyright (c) 2016 OpenAI (https://openai.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
# Mujoco models
This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license:
```
This file is part of MuJoCo.
Copyright 2009-2015 Roboti LLC.
Mujoco :: Advanced physics simulation engine
Source : www.roboti.us
Version : 1.31
Released : 23Apr16
Author :: Vikash Kumar
Contacts : kumar@roboti.us
```

View File

@ -0,0 +1,24 @@
.PHONY: install test
install:
pip install -r requirements.txt
base:
docker pull ubuntu:14.04
docker tag ubuntu:14.04 quay.io/openai/gym:base
docker push quay.io/openai/gym:base
test:
docker build -f test.dockerfile -t quay.io/openai/gym:test .
docker push quay.io/openai/gym:test
upload:
rm -rf dist
python setup.py sdist
twine upload dist/*
docker-build:
docker build -t quay.io/openai/gym .
docker-run:
docker run -ti quay.io/openai/gym bash

View File

@ -0,0 +1,352 @@
**Status:** Maintenance (expect bug fixes and minor updates)
OpenAI Gym
**********
**OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to a standardized set of environments.
.. image:: https://travis-ci.org/openai/gym.svg?branch=master
:target: https://travis-ci.org/openai/gym
`See What's New section below <#what-s-new>`_
``gym`` makes no assumptions about the structure of your agent, and is compatible with any numerical computation library, such as TensorFlow or Theano. You can use it from Python code, and soon from other languages.
If you're not sure where to start, we recommend beginning with the
`docs <https://gym.openai.com/docs>`_ on our site. See also the `FAQ <https://github.com/openai/gym/wiki/FAQ>`_.
A whitepaper for OpenAI Gym is available at http://arxiv.org/abs/1606.01540, and here's a BibTeX entry that you can use to cite it in a publication::
@misc{1606.01540,
Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba},
Title = {OpenAI Gym},
Year = {2016},
Eprint = {arXiv:1606.01540},
}
.. contents:: **Contents of this document**
:depth: 2
Basics
======
There are two basic concepts in reinforcement learning: the
environment (namely, the outside world) and the agent (namely, the
algorithm you are writing). The agent sends `actions` to the
environment, and the environment replies with `observations` and
`rewards` (that is, a score).
The core `gym` interface is `Env <https://github.com/openai/gym/blob/master/gym/core.py>`_, which is
the unified environment interface. There is no interface for agents;
that part is left to you. The following are the ``Env`` methods you
should know:
- `reset(self)`: Reset the environment's state. Returns `observation`.
- `step(self, action)`: Step the environment by one timestep. Returns `observation`, `reward`, `done`, `info`.
- `render(self, mode='human', close=False)`: Render one frame of the environment. The default mode will do something human friendly, such as pop up a window. Passing the `close` flag signals the renderer to close any such windows.
Installation
============
You can perform a minimal install of ``gym`` with:
.. code:: shell
git clone https://github.com/openai/gym.git
cd gym
pip install -e .
If you prefer, you can do a minimal install of the packaged version directly from PyPI:
.. code:: shell
pip install gym
You'll be able to run a few environments right away:
- algorithmic
- toy_text
- classic_control (you'll need ``pyglet`` to render though)
We recommend playing with those environments at first, and then later
installing the dependencies for the remaining environments.
Installing everything
---------------------
To install the full set of environments, you'll need to have some system
packages installed. We'll build out the list here over time; please let us know
what you end up installing on your platform. Also, take a look at the docker files (test.dockerfile.xx.xx) to
see the composition of our CI-tested images.
On OSX:
.. code:: shell
brew install cmake boost boost-python sdl2 swig wget
On Ubuntu 14.04 (non-mujoco only):
.. code:: shell
apt-get install libjpeg-dev cmake swig python-pyglet python3-opengl libboost-all-dev \
libsdl2-2.0.0 libsdl2-dev libglu1-mesa libglu1-mesa-dev libgles2-mesa-dev \
freeglut3 xvfb libav-tools
On Ubuntu 16.04:
.. code:: shell
apt-get install -y python-pyglet python3-opengl zlib1g-dev libjpeg-dev patchelf \
cmake swig libboost-all-dev libsdl2-dev libosmesa6-dev xvfb ffmpeg
On Ubuntu 18.04:
.. code:: shell
apt install -y python3-dev zlib1g-dev libjpeg-dev cmake swig python-pyglet python3-opengl libboost-all-dev libsdl2-dev \
libosmesa6-dev patchelf ffmpeg xvfb
MuJoCo has a proprietary dependency we can't set up for you. Follow
the
`instructions <https://github.com/openai/mujoco-py#obtaining-the-binaries-and-license-key>`_
in the ``mujoco-py`` package for help.
Once you're ready to install everything, run ``pip install -e '.[all]'`` (or ``pip install 'gym[all]'``).
Supported systems
-----------------
We currently support Linux and OS X running Python 2.7 or 3.5. Some users on OSX + Python3 may need to run
.. code:: shell
brew install boost-python --with-python3
If you want to access Gym from languages other than python, we have limited support for non-python
frameworks, such as lua/Torch, using the OpenAI Gym `HTTP API <https://github.com/openai/gym-http-api>`_.
Pip version
-----------
To run ``pip install -e '.[all]'``, you'll need a semi-recent pip.
Please make sure your pip is at least at version ``1.5.0``. You can
upgrade using the following: ``pip install --ignore-installed
pip``. Alternatively, you can open `setup.py
<https://github.com/openai/gym/blob/master/setup.py>`_ and
install the dependencies by hand.
Rendering on a server
---------------------
If you're trying to render video on a server, you'll need to connect a
fake display. The easiest way to do this is by running under
``xvfb-run`` (on Ubuntu, install the ``xvfb`` package):
.. code:: shell
xvfb-run -s "-screen 0 1400x900x24" bash
Installing dependencies for specific environments
-------------------------------------------------
If you'd like to install the dependencies for only specific
environments, see `setup.py
<https://github.com/openai/gym/blob/master/setup.py>`_. We
maintain the lists of dependencies on a per-environment group basis.
Environments
============
The code for each environment group is housed in its own subdirectory
`gym/envs
<https://github.com/openai/gym/blob/master/gym/envs>`_. The
specification of each task is in `gym/envs/__init__.py
<https://github.com/openai/gym/blob/master/gym/envs/__init__.py>`_. It's
worth browsing through both.
Algorithmic
-----------
These are a variety of algorithmic tasks, such as learning to copy a
sequence.
.. code:: python
import gym
env = gym.make('Copy-v0')
env.reset()
env.render()
Atari
-----
The Atari environments are a variety of Atari video games. If you didn't do the full install, you can install dependencies via ``pip install -e '.[atari]'`` (you'll need ``cmake`` installed) and then get started as follow:
.. code:: python
import gym
env = gym.make('SpaceInvaders-v0')
env.reset()
env.render()
This will install ``atari-py``, which automatically compiles the `Arcade Learning Environment <http://www.arcadelearningenvironment.org/>`_. This can take quite a while (a few minutes on a decent laptop), so just be prepared.
Box2d
-----------
Box2d is a 2D physics engine. You can install it via ``pip install -e '.[box2d]'`` and then get started as follow:
.. code:: python
import gym
env = gym.make('LunarLander-v2')
env.reset()
env.render()
Classic control
---------------
These are a variety of classic control tasks, which would appear in a typical reinforcement learning textbook. If you didn't do the full install, you will need to run ``pip install -e '.[classic_control]'`` to enable rendering. You can get started with them via:
.. code:: python
import gym
env = gym.make('CartPole-v0')
env.reset()
env.render()
MuJoCo
------
`MuJoCo <http://www.mujoco.org/>`_ is a physics engine which can do
very detailed efficient simulations with contacts. It's not
open-source, so you'll have to follow the instructions in `mujoco-py
<https://github.com/openai/mujoco-py#obtaining-the-binaries-and-license-key>`_
to set it up. You'll have to also run ``pip install -e '.[mujoco]'`` if you didn't do the full install.
.. code:: python
import gym
env = gym.make('Humanoid-v2')
env.reset()
env.render()
Robotics
------
`MuJoCo <http://www.mujoco.org/>`_ is a physics engine which can do
very detailed efficient simulations with contacts and we use it for all robotics environments. It's not
open-source, so you'll have to follow the instructions in `mujoco-py
<https://github.com/openai/mujoco-py#obtaining-the-binaries-and-license-key>`_
to set it up. You'll have to also run ``pip install -e '.[robotics]'`` if you didn't do the full install.
.. code:: python
import gym
env = gym.make('HandManipulateBlock-v0')
env.reset()
env.render()
You can also find additional details in the accompanying `technical report <https://arxiv.org/abs/1802.09464>`_ and `blog post <https://blog.openai.com/ingredients-for-robotics-research/>`_.
If you use these environments, you can cite them as follows::
@misc{1802.09464,
Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba},
Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research},
Year = {2018},
Eprint = {arXiv:1802.09464},
}
Toy text
--------
Toy environments which are text-based. There's no extra dependency to install, so to get started, you can just do:
.. code:: python
import gym
env = gym.make('FrozenLake-v0')
env.reset()
env.render()
Examples
========
See the ``examples`` directory.
- Run `examples/agents/random_agent.py <https://github.com/openai/gym/blob/master/examples/agents/random_agent.py>`_ to run an simple random agent.
- Run `examples/agents/cem.py <https://github.com/openai/gym/blob/master/examples/agents/cem.py>`_ to run an actual learning agent (using the cross-entropy method).
- Run `examples/scripts/list_envs <https://github.com/openai/gym/blob/master/examples/scripts/list_envs>`_ to generate a list of all environments.
Testing
=======
We are using `pytest <http://doc.pytest.org>`_ for tests. You can run them via:
.. code:: shell
pytest
.. _See What's New section below:
What's new
==========
- 2018-02-28: Release of a set of new robotics environments.
- 2018-01-25: Made some aesthetic improvements and removed unmaintained parts of gym. This may seem like a downgrade in functionality, but it is actually a long-needed cleanup in preparation for some great new things that will be released in the next month.
+ Now your `Env` and `Wrapper` subclasses should define `step`, `reset`, `render`, `close`, `seed` rather than underscored method names.
+ Removed the `board_game`, `debugging`, `safety`, `parameter_tuning` environments since they're not being maintained by us at OpenAI. We encourage authors and users to create new repositories for these environments.
+ Changed `MultiDiscrete` action space to range from `[0, ..., n-1]` rather than `[a, ..., b-1]`.
+ No more `render(close=True)`, use env-specific methods to close the rendering.
+ Removed `scoreboard` directory, since site doesn't exist anymore.
+ Moved `gym/monitoring` to `gym/wrappers/monitoring`
+ Add `dtype` to `Space`.
+ Not using python's built-in module anymore, using `gym.logger`
- 2018-01-24: All continuous control environments now use mujoco_py >= 1.50.
Versions have been updated accordingly to -v2, e.g. HalfCheetah-v2. Performance
should be similar (see https://github.com/openai/gym/pull/834) but there are likely
some differences due to changes in MuJoCo.
- 2017-06-16: Make env.spec into a property to fix a bug that occurs
when you try to print out an unregistered Env.
- 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at
*v4*. To keep using the old v3 environments, keep gym <= 0.8.2 and atari-py
<= 0.0.21. Note that the v4 environments will not give identical results to
existing v3 results, although differences are minor. The v4 environments
incorporate the latest Arcade Learning Environment (ALE), including several
ROM fixes, and now handle loading and saving of the emulator state. While
seeds still ensure determinism, the effect of any given seed is not preserved
across this upgrade because the random number generator in ALE has changed.
The `*NoFrameSkip-v4` environments should be considered the canonical Atari
environments from now on.
- 2017-03-05: BACKWARDS INCOMPATIBILITY: The `configure` method has been removed
from `Env`. `configure` was not used by `gym`, but was used by some dependent
libraries including `universe`. These libraries will migrate away from the
configure method by using wrappers instead. This change is on master and will be released with 0.8.0.
- 2016-12-27: BACKWARDS INCOMPATIBILITY: The gym monitor is now a
wrapper. Rather than starting monitoring as
`env.monitor.start(directory)`, envs are now wrapped as follows:
`env = wrappers.Monitor(env, directory)`. This change is on master
and will be released with 0.7.0.
- 2016-11-1: Several experimental changes to how a running monitor interacts
with environments. The monitor will now raise an error if reset() is called
when the env has not returned done=True. The monitor will only record complete
episodes where done=True. Finally, the monitor no longer calls seed() on the
underlying env, nor does it record or upload seed information.
- 2016-10-31: We're experimentally expanding the environment ID format
to include an optional username.
- 2016-09-21: Switch the Gym automated logger setup to configure the
root logger rather than just the 'gym' logger.
- 2016-08-17: Calling `close` on an env will also close the monitor
and any rendering windows.
- 2016-08-17: The monitor will no longer write manifest files in
real-time, unless `write_upon_reset=True` is passed.
- 2016-05-28: For controlled reproducibility, envs now support seeding
(cf #91 and #135). The monitor records which seeds are used. We will
soon add seed information to the display on the scoreboard.

View File

@ -0,0 +1,26 @@
#!/bin/bash
# This script is the entrypoint for our Docker image.
set -ex
# Set up display; otherwise rendering will fail
Xvfb -screen 0 1024x768x24 &
export DISPLAY=:0
# Wait for the file to come up
display=0
file="/tmp/.X11-unix/X$display"
for i in $(seq 1 10); do
if [ -e "$file" ]; then
break
fi
echo "Waiting for $file to be created (try $i/10)"
sleep "$i"
done
if ! [ -e "$file" ]; then
echo "Timing out: $file was not created"
exit 1
fi
exec "$@"

View File

@ -0,0 +1,21 @@
#!/usr/bin/env python3
import argparse
import gym
parser = argparse.ArgumentParser(description='Renders a Gym environment for quick inspection.')
parser.add_argument('env_id', type=str, help='the ID of the environment to be rendered (e.g. HalfCheetah-v1')
parser.add_argument('--step', type=int, default=1)
args = parser.parse_args()
env = gym.make(args.env_id)
env.reset()
step = 0
while True:
if args.step:
env.step(env.action_space.sample())
env.render()
if step % 10 == 0:
env.reset()
step += 1

View File

@ -0,0 +1,33 @@
# Agents
An "agent" describes the method of running an RL algorithm against an environment in the gym. The agent may contain the algorithm itself or simply provide an integration between an algorithm and the gym environments.
## RandomAgent
A sample agent located in this repo at `gym/examples/agents/random_agent.py`. This simple agent leverages the environments ability to produce a random valid action and does so for each step.
## cem.py
A generic Cross-Entropy agent located in this repo at `gym/examples/agents/cem.py`. This agent defaults to 10 iterations of 25 episodes considering the top 20% "elite".
## dqn
This is a very basic DQN (with experience replay) implementation, which uses OpenAI's gym environment and Keras/Theano neural networks. [/sherjilozair/dqn](https://github.com/sherjilozair/dqn)
## Simple DQN
Simple, fast and easy to extend DQN implementation using [Neon](https://github.com/NervanaSystems/neon) deep learning library. Comes with out-of-box tools to train, test and visualize models. For details see [this blog post](https://www.nervanasys.com/deep-reinforcement-learning-with-neon/) or check out the [repo](https://github.com/tambetm/simple_dqn).
## AgentNet
A library that allows you to develop custom deep/convolutional/recurrent reinforcement learning agent with full integration with Theano/Lasagne. Also contains a toolkit for various reinforcement learning algorithms, policies, memory augmentations, etc.
- The repo's here: [AgentNet](https://github.com/yandexdataschool/AgentNet)
- [A step-by-step demo for Atari SpaceInvaders ](https://github.com/yandexdataschool/AgentNet/blob/master/examples/Playing%20Atari%20with%20Deep%20Reinforcement%20Learning%20%28OpenAI%20Gym%29.ipynb)
## rllab
a framework for developing and evaluating reinforcement learning algorithms, fully compatible with OpenAI Gym. It includes a wide range of continuous control tasks plus implementations of many algorithms. [/rllab/rllab](https://github.com/rllab/rllab)
## [keras-rl](https://github.com/matthiasplappert/keras-rl)
[keras-rl](https://github.com/matthiasplappert/keras-rl) implements some state-of-the art deep reinforcement learning algorithms. It was built with OpenAI Gym in mind, and also built on top of the deep learning library [Keras](https://keras.io/) and utilises similar design patterns like callbacks and user-definable metrics.

View File

@ -0,0 +1,48 @@
# Environments
The gym comes prepackaged with many many environments. It's this common API around many environments that makes the gym so great. Here we will list additional environments that do not come prepacked with the gym. Submit another to this list via a pull-request.
_**NOTICE**: Its possible that in time OpenAI will develop a full fledged repository of supplemental environments. Until then this bit of markdown will suffice._
## PGE: Parallel Game Engine
PGE is a FOSS 3D engine for AI simulations, and can interoperate with the Gym. Contains environments with modern 3D graphics, and uses Bullet for physics.
Learn more here: https://github.com/222464/PGE
## gym-inventory: Inventory Control Environments
gym-inventory is a single agent domain featuring discrete state and action spaces that an AI agent might encounter in inventory control problems.
Learn more here: https://github.com/paulhendricks/gym-inventory
## gym-gazebo: training Robots in Gazebo
gym-gazebo presents an extension of the initial OpenAI gym for robotics using ROS and Gazebo, an advanced 3D modeling and
rendering tool.
Learn more here: https://github.com/erlerobot/gym-gazebo/
## gym-maze: 2D maze environment
A simple 2D maze environment where an agent finds its way from the start position to the goal.
Learn more here: https://github.com/tuzzer/gym-maze/
## gym-minigrid: Minimalistic Gridworld Environment
A minimalistic gridworld environment. Seeks to minimize software dependencies, be easy to extend and deliver good performance for faster training.
Learn more here: https://github.com/maximecb/gym-minigrid
## gym-sokoban: 2D Transportation Puzzles
The environment consists of transportation puzzles in which the player's goal is to push all boxes on the warehouse's storage locations.
The advantage of the environment is that it generates a new random level every time it is initialized or reset, which prevents over fitting to predefined levels.
Learn more here: https://github.com/mpSchrader/gym-sokoban
## gym-duckietown: Lane-Following Simulator for Duckietown
A lane-following simulator built for the [Duckietown](http://duckietown.org/) project (small-scale self-driving car course).
Learn more here: https://github.com/duckietown/gym-duckietown

View File

@ -0,0 +1,7 @@
# Miscellaneous
Here we have a bunch of tools, libs, apis, tutorials, resources, etc. provided by the community to add value to the gym ecosystem.
## OpenAIGym.jl
Convenience wrapper of the OpenAI Gym for the Julia language [/tbreloff/OpenAIGym.jl](https://github.com/tbreloff/OpenAIGym.jl)

View File

@ -0,0 +1,7 @@
# Table of Contents
- [Agents](agents.md) contains a listing of agents compatible with gym environments. Agents facilitate the running of an algorithm against an environment.
- [Environments](environments.md) lists more environments to run your algorithms against. These do not come prepackaged with the gym.
- [Miscellaneous](misc.md) is a collection of other value-add tools and utilities. These could be anything from a small convenience lib to a collection of video tutorials or a new language binding.

View File

@ -0,0 +1,19 @@
# Support code for cem.py
class BinaryActionLinearPolicy(object):
def __init__(self, theta):
self.w = theta[:-1]
self.b = theta[-1]
def act(self, ob):
y = ob.dot(self.w) + self.b
a = int(y < 0)
return a
class ContinuousActionLinearPolicy(object):
def __init__(self, theta, n_in, n_out):
assert len(theta) == (n_in + 1) * n_out
self.W = theta[0 : n_in * n_out].reshape(n_in, n_out)
self.b = theta[n_in * n_out : None].reshape(1, n_out)
def act(self, ob):
a = ob.dot(self.W) + self.b
return a

View File

@ -0,0 +1,93 @@
from __future__ import print_function
import gym
from gym import wrappers, logger
import numpy as np
from six.moves import cPickle as pickle
import json, sys, os
from os import path
from _policies import BinaryActionLinearPolicy # Different file so it can be unpickled
import argparse
def cem(f, th_mean, batch_size, n_iter, elite_frac, initial_std=1.0):
"""
Generic implementation of the cross-entropy method for maximizing a black-box function
f: a function mapping from vector -> scalar
th_mean: initial mean over input distribution
batch_size: number of samples of theta to evaluate per batch
n_iter: number of batches
elite_frac: each batch, select this fraction of the top-performing samples
initial_std: initial standard deviation over parameter vectors
"""
n_elite = int(np.round(batch_size*elite_frac))
th_std = np.ones_like(th_mean) * initial_std
for _ in range(n_iter):
ths = np.array([th_mean + dth for dth in th_std[None,:]*np.random.randn(batch_size, th_mean.size)])
ys = np.array([f(th) for th in ths])
elite_inds = ys.argsort()[::-1][:n_elite]
elite_ths = ths[elite_inds]
th_mean = elite_ths.mean(axis=0)
th_std = elite_ths.std(axis=0)
yield {'ys' : ys, 'theta_mean' : th_mean, 'y_mean' : ys.mean()}
def do_rollout(agent, env, num_steps, render=False):
total_rew = 0
ob = env.reset()
for t in range(num_steps):
a = agent.act(ob)
(ob, reward, done, _info) = env.step(a)
total_rew += reward
if render and t%3==0: env.render()
if done: break
return total_rew, t+1
if __name__ == '__main__':
logger.set_level(logger.INFO)
parser = argparse.ArgumentParser()
parser.add_argument('--display', action='store_true')
parser.add_argument('target', nargs="?", default="CartPole-v0")
args = parser.parse_args()
env = gym.make(args.target)
env.seed(0)
np.random.seed(0)
params = dict(n_iter=10, batch_size=25, elite_frac = 0.2)
num_steps = 200
# You provide the directory to write to (can be an existing
# directory, but can't contain previous monitor results. You can
# also dump to a tempdir if you'd like: tempfile.mkdtemp().
outdir = '/tmp/cem-agent-results'
env = wrappers.Monitor(env, outdir, force=True)
# Prepare snapshotting
# ----------------------------------------
def writefile(fname, s):
with open(path.join(outdir, fname), 'w') as fh: fh.write(s)
info = {}
info['params'] = params
info['argv'] = sys.argv
info['env_id'] = env.spec.id
# ------------------------------------------
def noisy_evaluation(theta):
agent = BinaryActionLinearPolicy(theta)
rew, T = do_rollout(agent, env, num_steps)
return rew
# Train the agent, and snapshot each stage
for (i, iterdata) in enumerate(
cem(noisy_evaluation, np.zeros(env.observation_space.shape[0]+1), **params)):
print('Iteration %2i. Episode mean reward: %7.3f'%(i, iterdata['y_mean']))
agent = BinaryActionLinearPolicy(iterdata['theta_mean'])
if args.display: do_rollout(agent, env, 200, render=True)
writefile('agent-%.4i.pkl'%i, str(pickle.dumps(agent, -1)))
# Write out the env at the end so we store the parameters of this
# environment.
writefile('info.json', json.dumps(info))
env.close()

View File

@ -0,0 +1,80 @@
#!/usr/bin/env python
from __future__ import print_function
import sys, gym, time
#
# Test yourself as a learning agent! Pass environment name as a command-line argument, for example:
#
# python keyboard_agent.py SpaceInvadersNoFrameskip-v4
#
env = gym.make('LunarLander-v2' if len(sys.argv)<2 else sys.argv[1])
if not hasattr(env.action_space, 'n'):
raise Exception('Keyboard agent only supports discrete action spaces')
ACTIONS = env.action_space.n
SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you
# can test what skip is still usable.
human_agent_action = 0
human_wants_restart = False
human_sets_pause = False
def key_press(key, mod):
global human_agent_action, human_wants_restart, human_sets_pause
if key==0xff0d: human_wants_restart = True
if key==32: human_sets_pause = not human_sets_pause
a = int( key - ord('0') )
if a <= 0 or a >= ACTIONS: return
human_agent_action = a
def key_release(key, mod):
global human_agent_action
a = int( key - ord('0') )
if a <= 0 or a >= ACTIONS: return
if human_agent_action == a:
human_agent_action = 0
env.render()
env.unwrapped.viewer.window.on_key_press = key_press
env.unwrapped.viewer.window.on_key_release = key_release
def rollout(env):
global human_agent_action, human_wants_restart, human_sets_pause
human_wants_restart = False
obser = env.reset()
skip = 0
total_reward = 0
total_timesteps = 0
while 1:
if not skip:
#print("taking action {}".format(human_agent_action))
a = human_agent_action
total_timesteps += 1
skip = SKIP_CONTROL
else:
skip -= 1
obser, r, done, info = env.step(a)
if r != 0:
print("reward %0.3f" % r)
total_reward += r
window_still_open = env.render()
if window_still_open==False: return False
if done: break
if human_wants_restart: break
while human_sets_pause:
env.render()
time.sleep(0.1)
time.sleep(0.1)
print("timesteps %i reward %0.2f" % (total_timesteps, total_reward))
print("ACTIONS={}".format(ACTIONS))
print("Press keys 1 2 3 ... to take actions 1 2 3 ...")
print("No keys pressed is taking action 0")
while 1:
window_still_open = rollout(env)
if window_still_open==False: break

View File

@ -0,0 +1,51 @@
import argparse
import sys
import gym
from gym import wrappers, logger
class RandomAgent(object):
"""The world's simplest agent!"""
def __init__(self, action_space):
self.action_space = action_space
def act(self, observation, reward, done):
return self.action_space.sample()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=None)
parser.add_argument('env_id', nargs='?', default='CartPole-v0', help='Select the environment to run')
args = parser.parse_args()
# You can set the level to logger.DEBUG or logger.WARN if you
# want to change the amount of output.
logger.set_level(logger.INFO)
env = gym.make(args.env_id)
# You provide the directory to write to (can be an existing
# directory, including one with existing data -- all monitor files
# will be namespaced). You can also dump to a tempdir if you'd
# like: tempfile.mkdtemp().
outdir = '/tmp/random-agent-results'
env = wrappers.Monitor(env, directory=outdir, force=True)
env.seed(0)
agent = RandomAgent(env.action_space)
episode_count = 100
reward = 0
done = False
for i in range(episode_count):
ob = env.reset()
while True:
action = agent.act(ob, reward, done)
ob, reward, done, _ = env.step(action)
if done:
break
# Note there's no env.render() here. But the environment still can open window and
# render if asked by env.monitor: it calls env.render('rgb_array') to record video.
# Video is not recorded every episode, see capped_cubic_video_schedule for details.
# Close the env and write monitor result info to disk
env.close()

View File

@ -0,0 +1,80 @@
#!/usr/bin/env python
#
# Run all the tasks on a benchmark using a random agent.
#
# This script assumes you have set an OPENAI_GYM_API_KEY environment
# variable. You can find your API key in the web interface:
# https://gym.openai.com/settings/profile.
#
import argparse
import logging
import os
import sys
import gym
# In modules, use `logger = logging.getLogger(__name__)`
from gym import wrappers
from gym.scoreboard.scoring import benchmark_score_from_local
import openai_benchmark
logger = logging.getLogger()
def main():
parser = argparse.ArgumentParser(description=None)
parser.add_argument('-b', '--benchmark-id', help='id of benchmark to run e.g. Atari7Ram-v0')
parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.')
parser.add_argument('-f', '--force', action='store_true', dest='force', default=False)
parser.add_argument('-t', '--training-dir', default="/tmp/gym-results", help='What directory to upload.')
args = parser.parse_args()
if args.verbosity == 0:
logger.setLevel(logging.INFO)
elif args.verbosity >= 1:
logger.setLevel(logging.DEBUG)
benchmark_id = args.benchmark_id
if benchmark_id is None:
logger.info("Must supply a valid benchmark")
return 1
try:
benchmark = gym.benchmark_spec(benchmark_id)
except Exception:
logger.info("Invalid benchmark")
return 1
# run benchmark tasks
for task in benchmark.tasks:
logger.info("Running on env: {}".format(task.env_id))
for trial in range(task.trials):
env = gym.make(task.env_id)
training_dir_name = "{}/{}-{}".format(args.training_dir, task.env_id, trial)
env = wrappers.Monitor(env, training_dir_name, video_callable=False, force=args.force)
env.reset()
for _ in range(task.max_timesteps):
o, r, done, _ = env.step(env.action_space.sample())
if done:
env.reset()
env.close()
logger.info("""Computing statistics for this benchmark run...
{{
score: {score},
num_envs_solved: {num_envs_solved},
summed_training_seconds: {summed_training_seconds},
start_to_finish_seconds: {start_to_finish_seconds},
}}
""".rstrip().format(**benchmark_score_from_local(benchmark_id, args.training_dir)))
logger.info("""Done running, upload results using the following command:
python -c "import gym; gym.upload('{}', benchmark_id='{}', algorithm_id='(unknown)')"
""".rstrip().format(args.training_dir, benchmark_id))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,5 @@
#!/usr/bin/env python
from gym import envs
envids = [spec.id for spec in envs.registry.all()]
for envid in sorted(envids):
print(envid)

View File

@ -0,0 +1,64 @@
#!/usr/bin/env python
import gym
from gym import spaces, envs
import argparse
import numpy as np
import itertools
import time
parser = argparse.ArgumentParser()
parser.add_argument("env")
parser.add_argument("--mode", choices=["noop", "random", "static", "human"],
default="random")
parser.add_argument("--max_steps", type=int, default=0)
parser.add_argument("--fps",type=float)
parser.add_argument("--once", action="store_true")
parser.add_argument("--ignore_done", action="store_true")
args = parser.parse_args()
env = envs.make(args.env)
ac_space = env.action_space
fps = args.fps or env.metadata.get('video.frames_per_second') or 100
if args.max_steps == 0: args.max_steps = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']
while True:
env.reset()
env.render(mode='human')
print("Starting a new trajectory")
for t in range(args.max_steps) if args.max_steps else itertools.count():
done = False
if args.mode == "noop":
if isinstance(ac_space, spaces.Box):
a = np.zeros(ac_space.shape)
elif isinstance(ac_space, spaces.Discrete):
a = 0
else:
raise NotImplementedError("noop not implemented for class {}".format(type(ac_space)))
_, _, done, _ = env.step(a)
time.sleep(1.0/fps)
elif args.mode == "random":
a = ac_space.sample()
_, _, done, _ = env.step(a)
time.sleep(1.0/fps)
elif args.mode == "static":
time.sleep(1.0/fps)
elif args.mode == "human":
a = raw_input("type action from {0,...,%i} and press enter: "%(ac_space.n-1))
try:
a = int(a)
except ValueError:
print("WARNING: ignoring illegal action '{}'.".format(a))
a = 0
if a >= ac_space.n:
print("WARNING: ignoring illegal action {}.".format(a))
a = 0
_, _, done, _ = env.step(a)
env.render()
if done and not args.ignore_done: break
print("Done after {} steps".format(t+1))
if args.once:
break
else:
raw_input("Press enter to continue")

View File

@ -0,0 +1,14 @@
import distutils.version
import os
import sys
import warnings
from gym import error
from gym.utils import reraise
from gym.version import VERSION as __version__
from gym.core import Env, GoalEnv, Space, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
from gym.envs import make, spec
from gym import logger
__all__ = ["Env", "Space", "Wrapper", "make", "spec"]

View File

@ -0,0 +1,343 @@
from gym import logger
import gym
from gym import error
from gym.utils import closer
env_closer = closer.Closer()
# Env-related abstractions
class Env(object):
"""The main OpenAI Gym class. It encapsulates an environment with
arbitrary behind-the-scenes dynamics. An environment can be
partially or fully observed.
The main API methods that users of this class need to know are:
step
reset
render
close
seed
And set the following attributes:
action_space: The Space object corresponding to valid actions
observation_space: The Space object corresponding to valid observations
reward_range: A tuple corresponding to the min and max possible rewards
Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
The methods are accessed publicly as "step", "reset", etc.. The
non-underscored versions are wrapper methods to which we may add
functionality over time.
"""
# Set this in SOME subclasses
metadata = {'render.modes': []}
reward_range = (-float('inf'), float('inf'))
spec = None
# Set these in ALL subclasses
action_space = None
observation_space = None
def step(self, action):
"""Run one timestep of the environment's dynamics. When end of
episode is reached, you are responsible for calling `reset()`
to reset this environment's state.
Accepts an action and returns a tuple (observation, reward, done, info).
Args:
action (object): an action provided by the environment
Returns:
observation (object): agent's observation of the current environment
reward (float) : amount of reward returned after previous action
done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
"""
raise NotImplementedError
def reset(self):
"""Resets the state of the environment and returns an initial observation.
Returns: observation (object): the initial observation of the
space.
"""
raise NotImplementedError
def render(self, mode='human'):
"""Renders the environment.
The set of supported modes varies per environment. (And some
environments do not support rendering at all.) By convention,
if mode is:
- human: render to the current display or terminal and
return nothing. Usually for human consumption.
- rgb_array: Return an numpy.ndarray with shape (x, y, 3),
representing RGB values for an x-by-y pixel image, suitable
for turning into a video.
- ansi: Return a string (str) or StringIO.StringIO containing a
terminal-style text representation. The text can include newlines
and ANSI escape sequences (e.g. for colors).
Note:
Make sure that your class's metadata 'render.modes' key includes
the list of supported modes. It's recommended to call super()
in implementations to use the functionality of this method.
Args:
mode (str): the mode to render with
close (bool): close all open renderings
Example:
class MyEnv(Env):
metadata = {'render.modes': ['human', 'rgb_array']}
def render(self, mode='human'):
if mode == 'rgb_array':
return np.array(...) # return RGB frame suitable for video
elif mode is 'human':
... # pop up a window and render
else:
super(MyEnv, self).render(mode=mode) # just raise an exception
"""
raise NotImplementedError
def close(self):
"""Override _close in your subclass to perform any necessary cleanup.
Environments will automatically close() themselves when
garbage collected or when the program exits.
"""
return
def seed(self, seed=None):
"""Sets the seed for this env's random number generator(s).
Note:
Some environments use multiple pseudorandom number generators.
We want to capture all such seeds used in order to ensure that
there aren't accidental correlations between multiple generators.
Returns:
list<bigint>: Returns the list of seeds used in this env's random
number generators. The first value in the list should be the
"main" seed, or the value which a reproducer should pass to
'seed'. Often, the main seed equals the provided 'seed', but
this won't be true if seed=None, for example.
"""
logger.warn("Could not seed environment %s", self)
return
@property
def unwrapped(self):
"""Completely unwrap this env.
Returns:
gym.Env: The base non-wrapped gym.Env instance
"""
return self
def __str__(self):
if self.spec is None:
return '<{} instance>'.format(type(self).__name__)
else:
return '<{}<{}>>'.format(type(self).__name__, self.spec.id)
class GoalEnv(Env):
"""A goal-based environment. It functions just as any regular OpenAI Gym environment but it
imposes a required structure on the observation_space. More concretely, the observation
space is required to contain at least three elements, namely `observation`, `desired_goal`, and
`achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve.
`achieved_goal` is the goal that it currently achieved instead. `observation` contains the
actual observations of the environment as per usual.
"""
def reset(self):
# Enforce that each GoalEnv uses a Goal-compatible observation space.
if not isinstance(self.observation_space, gym.spaces.Dict):
raise error.Error('GoalEnv requires an observation space of type gym.spaces.Dict')
result = super(GoalEnv, self).reset()
for key in ['observation', 'achieved_goal', 'desired_goal']:
if key not in result:
raise error.Error('GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(key))
return result
def compute_reward(self, achieved_goal, desired_goal, info):
"""Compute the step reward. This externalizes the reward function and makes
it dependent on an a desired goal and the one that was achieved. If you wish to include
additional rewards that are independent of the goal, you can include the necessary values
to derive it in info and compute it accordingly.
Args:
achieved_goal (object): the goal that was achieved during execution
desired_goal (object): the desired goal that we asked the agent to attempt to achieve
info (dict): an info dictionary with additional information
Returns:
float: The reward that corresponds to the provided achieved goal w.r.t. to the desired
goal. Note that the following should always hold true:
ob, reward, done, info = env.step()
assert reward == env.compute_reward(ob['achieved_goal'], ob['goal'], info)
"""
raise NotImplementedError()
# Space-related abstractions
class Space(object):
"""Defines the observation and action spaces, so you can write generic
code that applies to any Env. For example, you can choose a random
action.
"""
def __init__(self, shape=None, dtype=None):
import numpy as np # takes about 300-400ms to import, so we load lazily
self.shape = None if shape is None else tuple(shape)
self.dtype = None if dtype is None else np.dtype(dtype)
def sample(self):
"""
Uniformly randomly sample a random element of this space
"""
raise NotImplementedError
def contains(self, x):
"""
Return boolean specifying if x is a valid
member of this space
"""
raise NotImplementedError
__contains__ = contains
def to_jsonable(self, sample_n):
"""Convert a batch of samples from this space to a JSONable data type."""
# By default, assume identity is JSONable
return sample_n
def from_jsonable(self, sample_n):
"""Convert a JSONable data type to a batch of samples from this space."""
# By default, assume identity is JSONable
return sample_n
warn_once = True
def deprecated_warn_once(text):
global warn_once
if not warn_once: return
warn_once = False
logger.warn(text)
class Wrapper(Env):
env = None
def __init__(self, env):
self.env = env
self.action_space = self.env.action_space
self.observation_space = self.env.observation_space
self.reward_range = self.env.reward_range
self.metadata = self.env.metadata
@classmethod
def class_name(cls):
return cls.__name__
def step(self, action):
if hasattr(self, "_step"):
deprecated_warn_once("%s doesn't implement 'step' method, but it implements deprecated '_step' method." % type(self))
self.step = self._step
return self.step(action)
else:
deprecated_warn_once("%s doesn't implement 'step' method, " % type(self) +
"which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
return self.env.step(action)
def reset(self, **kwargs):
if hasattr(self, "_reset"):
deprecated_warn_once("%s doesn't implement 'reset' method, but it implements deprecated '_reset' method." % type(self))
self.reset = self._reset
return self._reset(**kwargs)
else:
deprecated_warn_once("%s doesn't implement 'reset' method, " % type(self) +
"which is required for wrappers derived directly from Wrapper. Deprecated default implementation is used.")
return self.env.reset(**kwargs)
def render(self, mode='human', **kwargs):
return self.env.render(mode, **kwargs)
def close(self):
if self.env:
return self.env.close()
def seed(self, seed=None):
return self.env.seed(seed)
def compute_reward(self, achieved_goal, desired_goal, info):
return self.env.compute_reward(achieved_goal, desired_goal, info)
def __str__(self):
return '<{}{}>'.format(type(self).__name__, self.env)
def __repr__(self):
return str(self)
@property
def unwrapped(self):
return self.env.unwrapped
@property
def spec(self):
return self.env.spec
class ObservationWrapper(Wrapper):
def step(self, action):
observation, reward, done, info = self.env.step(action)
return self.observation(observation), reward, done, info
def reset(self, **kwargs):
observation = self.env.reset(**kwargs)
return self.observation(observation)
def observation(self, observation):
deprecated_warn_once("%s doesn't implement 'observation' method. Maybe it implements deprecated '_observation' method." % type(self))
return self._observation(observation)
class RewardWrapper(Wrapper):
def reset(self):
return self.env.reset()
def step(self, action):
observation, reward, done, info = self.env.step(action)
return observation, self.reward(reward), done, info
def reward(self, reward):
deprecated_warn_once("%s doesn't implement 'reward' method. Maybe it implements deprecated '_reward' method." % type(self))
return self._reward(reward)
class ActionWrapper(Wrapper):
def step(self, action):
action = self.action(action)
return self.env.step(action)
def reset(self):
return self.env.reset()
def action(self, action):
deprecated_warn_once("%s doesn't implement 'action' method. Maybe it implements deprecated '_action' method." % type(self))
return self._action(action)
def reverse_action(self, action):
deprecated_warn_once("%s doesn't implement 'reverse_action' method. Maybe it implements deprecated '_reverse_action' method." % type(self))
return self._reverse_action(action)

View File

@ -0,0 +1,113 @@
# Envs
These are the core integrated environments. Note that we may later
restructure any of the files, but will keep the environments available
at the relevant package's top-level. So for example, you should access
`AntEnv` as follows:
```
# Will be supported in future releases
from gym.envs import mujoco
mujoco.AntEnv
```
Rather than:
```
# May break in future releases
from gym.envs.mujoco import ant
ant.AntEnv
```
## How to create new environments for Gym
* Create a new repo called gym-foo, which should also be a PIP package.
* A good example is https://github.com/openai/gym-soccer.
* It should have at least the following files:
```sh
gym-foo/
README.md
setup.py
gym_foo/
__init__.py
envs/
__init__.py
foo_env.py
foo_extrahard_env.py
```
* `gym-foo/setup.py` should have:
```python
from setuptools import setup
setup(name='gym_foo',
version='0.0.1',
install_requires=['gym'] # And any other dependencies foo needs
)
```
* `gym-foo/gym_foo/__init__.py` should have:
```python
from gym.envs.registration import register
register(
id='foo-v0',
entry_point='gym_foo.envs:FooEnv',
)
register(
id='foo-extrahard-v0',
entry_point='gym_foo.envs:FooExtraHardEnv',
)
```
* `gym-foo/gym_foo/envs/__init__.py` should have:
```python
from gym_foo.envs.foo_env import FooEnv
from gym_foo.envs.foo_extrahard_env import FooExtraHardEnv
```
* `gym-foo/gym_foo/envs/foo_env.py` should look something like:
```python
import gym
from gym import error, spaces, utils
from gym.utils import seeding
class FooEnv(gym.Env):
metadata = {'render.modes': ['human']}
def __init__(self):
...
def step(self, action):
...
def reset(self):
...
def render(self, mode='human', close=False):
...
```
## How to add new environments to Gym, within this repo (not recommended for new environments)
1. Write your environment in an existing collection or a new collection. All collections are subfolders of `/gym/envs'.
2. Import your environment into the `__init__.py` file of the collection. This file will be located at `/gym/envs/my_collection/__init__.py`. Add `from gym.envs.my_collection.my_awesome_env import MyEnv` to this file.
3. Register your env in `/gym/envs/__init__.py`:
```
register(
id='MyEnv-v0',
entry_point='gym.envs.my_collection:MyEnv',
)
```
4. Add your environment to the scoreboard in `/gym/scoreboard/__init__.py`:
```
add_task(
id='MyEnv-v0',
summary="Super cool environment",
group='my_collection',
contributor='mygithubhandle',
)
```

View File

@ -0,0 +1,533 @@
from gym.envs.registration import registry, register, make, spec
# Algorithmic
# ----------------------------------------
register(
id='Copy-v0',
entry_point='gym.envs.algorithmic:CopyEnv',
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='RepeatCopy-v0',
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
max_episode_steps=200,
reward_threshold=75.0,
)
register(
id='ReversedAddition-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 2},
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='ReversedAddition3-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 3},
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='DuplicatedInput-v0',
entry_point='gym.envs.algorithmic:DuplicatedInputEnv',
max_episode_steps=200,
reward_threshold=9.0,
)
register(
id='Reverse-v0',
entry_point='gym.envs.algorithmic:ReverseEnv',
max_episode_steps=200,
reward_threshold=25.0,
)
# Classic
# ----------------------------------------
register(
id='CartPole-v0',
entry_point='gym.envs.classic_control:CartPoleEnv',
max_episode_steps=200,
reward_threshold=195.0,
)
register(
id='CartPole-v1',
entry_point='gym.envs.classic_control:CartPoleEnv',
max_episode_steps=500,
reward_threshold=475.0,
)
register(
id='MountainCar-v0',
entry_point='gym.envs.classic_control:MountainCarEnv',
max_episode_steps=200,
reward_threshold=-110.0,
)
register(
id='MountainCarContinuous-v0',
entry_point='gym.envs.classic_control:Continuous_MountainCarEnv',
max_episode_steps=999,
reward_threshold=90.0,
)
register(
id='Pendulum-v0',
entry_point='gym.envs.classic_control:PendulumEnv',
max_episode_steps=200,
)
register(
id='Acrobot-v1',
entry_point='gym.envs.classic_control:AcrobotEnv',
max_episode_steps=500,
)
# Box2d
# ----------------------------------------
register(
id='LunarLander-v2',
entry_point='gym.envs.box2d:LunarLander',
max_episode_steps=1000,
reward_threshold=200,
)
register(
id='LunarLanderContinuous-v2',
entry_point='gym.envs.box2d:LunarLanderContinuous',
max_episode_steps=1000,
reward_threshold=200,
)
register(
id='BipedalWalker-v2',
entry_point='gym.envs.box2d:BipedalWalker',
max_episode_steps=1600,
reward_threshold=300,
)
register(
id='BipedalWalkerHardcore-v2',
entry_point='gym.envs.box2d:BipedalWalkerHardcore',
max_episode_steps=2000,
reward_threshold=300,
)
register(
id='CarRacing-v0',
entry_point='gym.envs.box2d:CarRacing',
max_episode_steps=1000,
reward_threshold=900,
)
# Toy Text
# ----------------------------------------
register(
id='Blackjack-v0',
entry_point='gym.envs.toy_text:BlackjackEnv',
)
register(
id='KellyCoinflip-v0',
entry_point='gym.envs.toy_text:KellyCoinflipEnv',
reward_threshold=246.61,
)
register(
id='KellyCoinflipGeneralized-v0',
entry_point='gym.envs.toy_text:KellyCoinflipGeneralizedEnv',
)
register(
id='FrozenLake-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name' : '4x4'},
max_episode_steps=100,
reward_threshold=0.78, # optimum = .8196
)
register(
id='FrozenLake8x8-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name' : '8x8'},
max_episode_steps=200,
reward_threshold=0.99, # optimum = 1
)
register(
id='CliffWalking-v0',
entry_point='gym.envs.toy_text:CliffWalkingEnv',
)
register(
id='NChain-v0',
entry_point='gym.envs.toy_text:NChainEnv',
max_episode_steps=1000,
)
register(
id='Roulette-v0',
entry_point='gym.envs.toy_text:RouletteEnv',
max_episode_steps=100,
)
register(
id='Taxi-v2',
entry_point='gym.envs.toy_text.taxi:TaxiEnv',
reward_threshold=8, # optimum = 8.46
max_episode_steps=200,
)
register(
id='GuessingGame-v0',
entry_point='gym.envs.toy_text.guessing_game:GuessingGame',
max_episode_steps=200,
)
register(
id='HotterColder-v0',
entry_point='gym.envs.toy_text.hotter_colder:HotterColder',
max_episode_steps=200,
)
# Mujoco
# ----------------------------------------
# 2D
register(
id='Reacher-v2',
entry_point='gym.envs.mujoco:ReacherEnv',
max_episode_steps=50,
reward_threshold=-3.75,
)
register(
id='Pusher-v2',
entry_point='gym.envs.mujoco:PusherEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='Thrower-v2',
entry_point='gym.envs.mujoco:ThrowerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='Striker-v2',
entry_point='gym.envs.mujoco:StrikerEnv',
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id='InvertedPendulum-v2',
entry_point='gym.envs.mujoco:InvertedPendulumEnv',
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id='InvertedDoublePendulum-v2',
entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv',
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id='HalfCheetah-v2',
entry_point='gym.envs.mujoco:HalfCheetahEnv',
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id='Hopper-v2',
entry_point='gym.envs.mujoco:HopperEnv',
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id='Swimmer-v2',
entry_point='gym.envs.mujoco:SwimmerEnv',
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id='Walker2d-v2',
max_episode_steps=1000,
entry_point='gym.envs.mujoco:Walker2dEnv',
)
register(
id='Ant-v2',
entry_point='gym.envs.mujoco:AntEnv',
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id='Humanoid-v2',
entry_point='gym.envs.mujoco:HumanoidEnv',
max_episode_steps=1000,
)
register(
id='HumanoidStandup-v2',
entry_point='gym.envs.mujoco:HumanoidStandupEnv',
max_episode_steps=1000,
)
# Robotics
# ----------------------------------------
def _merge(a, b):
a.update(b)
return a
for reward_type in ['sparse', 'dense']:
suffix = 'Dense' if reward_type == 'dense' else ''
kwargs = {
'reward_type': reward_type,
}
# Fetch
register(
id='FetchSlide{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchSlideEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='FetchPickAndPlace{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchPickAndPlaceEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='FetchReach{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchReachEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='FetchPush{}-v1'.format(suffix),
entry_point='gym.envs.robotics:FetchPushEnv',
kwargs=kwargs,
max_episode_steps=50,
)
# Hand
register(
id='HandReach{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandReachEnv',
kwargs=kwargs,
max_episode_steps=50,
)
register(
id='HandManipulateBlockRotateZ{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'z'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateParallel{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'parallel'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockRotateXYZ{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateBlockFull{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Alias for "Full"
register(
id='HandManipulateBlock{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandBlockEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggRotate{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulateEggFull{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Alias for "Full"
register(
id='HandManipulateEgg{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandEggEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenRotate{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenEnv',
kwargs=_merge({'target_position': 'ignore', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
register(
id='HandManipulatePenFull{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Alias for "Full"
register(
id='HandManipulatePen{}-v0'.format(suffix),
entry_point='gym.envs.robotics:HandPenEnv',
kwargs=_merge({'target_position': 'random', 'target_rotation': 'xyz'}, kwargs),
max_episode_steps=100,
)
# Atari
# ----------------------------------------
# # print ', '.join(["'{}'".format(name.split('.')[0]) for name in atari_py.list_games()])
for game in ['air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis',
'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival',
'centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk',
'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar',
'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kangaroo', 'krull', 'kung_fu_master',
'montezuma_revenge', 'ms_pacman', 'name_this_game', 'phoenix', 'pitfall', 'pong', 'pooyan',
'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing',
'solaris', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down',
'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']:
for obs_type in ['image', 'ram']:
# space_invaders should yield SpaceInvaders-v0 and SpaceInvaders-ram-v0
name = ''.join([g.capitalize() for g in game.split('_')])
if obs_type == 'ram':
name = '{}-ram'.format(name)
nondeterministic = False
if game == 'elevator_action' and obs_type == 'ram':
# ElevatorAction-ram-v0 seems to yield slightly
# non-deterministic observations about 10% of the time. We
# should track this down eventually, but for now we just
# mark it as nondeterministic.
nondeterministic = True
register(
id='{}-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'repeat_action_probability': 0.25},
max_episode_steps=10000,
nondeterministic=nondeterministic,
)
register(
id='{}-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
# Standard Deterministic (as in the original DeepMind paper)
if game == 'space_invaders':
frameskip = 3
else:
frameskip = 4
# Use a deterministic frame skip.
register(
id='{}Deterministic-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip, 'repeat_action_probability': 0.25},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
register(
id='{}Deterministic-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': frameskip},
max_episode_steps=100000,
nondeterministic=nondeterministic,
)
register(
id='{}NoFrameskip-v0'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1, 'repeat_action_probability': 0.25}, # A frameskip of 1 means we get every frame
max_episode_steps=frameskip * 100000,
nondeterministic=nondeterministic,
)
# No frameskip. (Atari has no entropy source, so these are
# deterministic environments.)
register(
id='{}NoFrameskip-v4'.format(name),
entry_point='gym.envs.atari:AtariEnv',
kwargs={'game': game, 'obs_type': obs_type, 'frameskip': 1}, # A frameskip of 1 means we get every frame
max_episode_steps=frameskip * 100000,
nondeterministic=nondeterministic,
)
# Unit test
# ---------
register(
id='CubeCrash-v0',
entry_point='gym.envs.unittest:CubeCrash',
reward_threshold=0.9,
)
register(
id='CubeCrashSparse-v0',
entry_point='gym.envs.unittest:CubeCrashSparse',
reward_threshold=0.9,
)
register(
id='CubeCrashScreenBecomesBlack-v0',
entry_point='gym.envs.unittest:CubeCrashScreenBecomesBlack',
reward_threshold=0.9,
)
register(
id='MemorizeDigits-v0',
entry_point='gym.envs.unittest:MemorizeDigits',
reward_threshold=20,
)

View File

@ -0,0 +1,5 @@
from gym.envs.algorithmic.copy_ import CopyEnv
from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv
from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv
from gym.envs.algorithmic.reverse import ReverseEnv
from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv

View File

@ -0,0 +1,326 @@
"""
Algorithmic environments have the following traits in common:
- A 1-d "input tape" or 2-d "input grid" of characters
- A target string which is a deterministic function of the input characters
Agents control a read head that moves over the input tape. Observations consist
of the single character currently under the read head. The read head may fall
off the end of the tape in any direction. When this happens, agents will observe
a special blank character (with index=env.base) until they get back in bounds.
Actions consist of 3 sub-actions:
- Direction to move the read head (left or right, plus up and down for 2-d envs)
- Whether to write to the output tape
- Which character to write (ignored if the above sub-action is 0)
An episode ends when:
- The agent writes the full target string to the output tape.
- The agent writes an incorrect character.
- The agent runs out the time limit. (Which is fairly conservative.)
Reward schedule:
write a correct character: +1
write a wrong character: -.5
run out the clock: -1
otherwise: 0
In the beginning, input strings will be fairly short. After an environment has
been consistently solved over some window of episodes, the environment will
increase the average length of generated strings. Typical env specs require
leveling up many times to reach their reward threshold.
"""
from gym import Env, logger
from gym.spaces import Discrete, Tuple
from gym.utils import colorize, seeding
import numpy as np
from six import StringIO
import sys
import math
class AlgorithmicEnv(Env):
metadata = {'render.modes': ['human', 'ansi']}
# Only 'promote' the length of generated input strings if the worst of the
# last n episodes was no more than this far from the maximum reward
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -1.0
def __init__(self, base=10, chars=False, starting_min_length=2):
"""
base: Number of distinct characters.
chars: If True, use uppercase alphabet. Otherwise, digits. Only affects
rendering.
starting_min_length: Minimum input string length. Ramps up as episodes
are consistently solved.
"""
self.base = base
# Keep track of this many past episodes
self.last = 10
# Cumulative reward earned this episode
self.episode_total_reward = None
# Running tally of reward shortfalls. e.g. if there were 10 points to earn and
# we got 8, we'd append -2
AlgorithmicEnv.reward_shortfalls = []
if chars:
self.charmap = [chr(ord('A')+i) for i in range(base)]
else:
self.charmap = [str(i) for i in range(base)]
self.charmap.append(' ')
# TODO: Not clear why this is a class variable rather than instance.
# Could lead to some spooky action at a distance if someone is working
# with multiple algorithmic envs at once. Also makes testing tricky.
AlgorithmicEnv.min_length = starting_min_length
# Three sub-actions:
# 1. Move read head left or write (or up/down)
# 2. Write or not
# 3. Which character to write. (Ignored if should_write=0)
self.action_space = Tuple(
[Discrete(len(self.MOVEMENTS)), Discrete(2), Discrete(self.base)]
)
# Can see just what is on the input tape (one of n characters, or nothing)
self.observation_space = Discrete(self.base + 1)
self.seed()
self.reset()
@classmethod
def _movement_idx(kls, movement_name):
return kls.MOVEMENTS.index(movement_name)
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _get_obs(self, pos=None):
"""Return an observation corresponding to the given read head position
(or the current read head position, if none is given)."""
raise NotImplemented
def _get_str_obs(self, pos=None):
ret = self._get_obs(pos)
return self.charmap[ret]
def _get_str_target(self, pos):
"""Return the ith character of the target string (or " " if index
out of bounds)."""
if pos < 0 or len(self.target) <= pos:
return " "
else:
return self.charmap[self.target[pos]]
def render_observation(self):
"""Return a string representation of the input tape/grid."""
raise NotImplementedError
def render(self, mode='human'):
outfile = StringIO() if mode == 'ansi' else sys.stdout
inp = "Total length of input instance: %d, step: %d\n" % (self.input_width, self.time)
outfile.write(inp)
x, y, action = self.read_head_position, self.write_head_position, self.last_action
if action is not None:
inp_act, out_act, pred = action
outfile.write("=" * (len(inp) - 1) + "\n")
y_str = "Output Tape : "
target_str = "Targets : "
if action is not None:
pred_str = self.charmap[pred]
x_str = self.render_observation()
for i in range(-2, len(self.target) + 2):
target_str += self._get_str_target(i)
if i < y - 1:
y_str += self._get_str_target(i)
elif i == (y - 1):
if action is not None and out_act == 1:
color = 'green' if pred == self.target[i] else 'red'
y_str += colorize(pred_str, color, highlight=True)
else:
y_str += self._get_str_target(i)
outfile.write(x_str)
outfile.write(y_str + "\n")
outfile.write(target_str + "\n\n")
if action is not None:
outfile.write("Current reward : %.3f\n" % self.last_reward)
outfile.write("Cumulative reward : %.3f\n" % self.episode_total_reward)
move = self.MOVEMENTS[inp_act]
outfile.write("Action : Tuple(move over input: %s,\n" % move)
out_act = out_act == 1
outfile.write(" write to the output tape: %s,\n" % out_act)
outfile.write(" prediction: %s)\n" % pred_str)
else:
outfile.write("\n" * 5)
return outfile
@property
def input_width(self):
return len(self.input_data)
def step(self, action):
assert self.action_space.contains(action)
self.last_action = action
inp_act, out_act, pred = action
done = False
reward = 0.0
self.time += 1
assert 0 <= self.write_head_position
if out_act == 1:
try:
correct = pred == self.target[self.write_head_position]
except IndexError:
logger.warn("It looks like you're calling step() even though this "+
"environment has already returned done=True. You should always call "+
"reset() once you receive done=True. Any further steps are undefined "+
"behaviour.")
correct = False
if correct:
reward = 1.0
else:
# Bail as soon as a wrong character is written to the tape
reward = -0.5
done = True
self.write_head_position += 1
if self.write_head_position >= len(self.target):
done = True
self._move(inp_act)
if self.time > self.time_limit:
reward = -1.0
done = True
obs = self._get_obs()
self.last_reward = reward
self.episode_total_reward += reward
return (obs, reward, done, {})
@property
def time_limit(self):
"""If an agent takes more than this many timesteps, end the episode
immediately and return a negative reward."""
# (Seemingly arbitrary)
return self.input_width + len(self.target) + 4
def _check_levelup(self):
"""Called between episodes. Update our running record of episode rewards
and, if appropriate, 'level up' minimum input length."""
if self.episode_total_reward is None:
# This is before the first episode/call to reset(). Nothing to do
return
AlgorithmicEnv.reward_shortfalls.append(self.episode_total_reward - len(self.target))
AlgorithmicEnv.reward_shortfalls = AlgorithmicEnv.reward_shortfalls[-self.last:]
if len(AlgorithmicEnv.reward_shortfalls) == self.last and \
min(AlgorithmicEnv.reward_shortfalls) >= self.MIN_REWARD_SHORTFALL_FOR_PROMOTION and \
AlgorithmicEnv.min_length < 30:
AlgorithmicEnv.min_length += 1
AlgorithmicEnv.reward_shortfalls = []
def reset(self):
self._check_levelup()
self.last_action = None
self.last_reward = 0
self.read_head_position = self.READ_HEAD_START
self.write_head_position = 0
self.episode_total_reward = 0.0
self.time = 0
length = self.np_random.randint(3) + AlgorithmicEnv.min_length
self.input_data = self.generate_input_data(length)
self.target = self.target_from_input_data(self.input_data)
return self._get_obs()
def generate_input_data(self, size):
raise NotImplemented
def target_from_input_data(self, input_data):
raise NotImplemented("Subclasses must implement")
def _move(self, movement):
raise NotImplemented
class TapeAlgorithmicEnv(AlgorithmicEnv):
"""An algorithmic env with a 1-d input tape."""
MOVEMENTS = ['left', 'right']
READ_HEAD_START = 0
def _move(self, movement):
named = self.MOVEMENTS[movement]
self.read_head_position += 1 if named == 'right' else -1
def _get_obs(self, pos=None):
if pos is None:
pos = self.read_head_position
if pos < 0:
return self.base
if isinstance(pos, np.ndarray):
pos = pos.item()
try:
return self.input_data[pos]
except IndexError:
return self.base
def generate_input_data(self, size):
return [self.np_random.randint(self.base) for _ in range(size)]
def render_observation(self):
x = self.read_head_position
x_str = "Observation Tape : "
for i in range(-2, self.input_width + 2):
if i == x:
x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True)
else:
x_str += self._get_str_obs(np.array([i]))
x_str += "\n"
return x_str
class GridAlgorithmicEnv(AlgorithmicEnv):
"""An algorithmic env with a 2-d input grid."""
MOVEMENTS = ['left', 'right', 'up', 'down']
READ_HEAD_START = (0, 0)
def __init__(self, rows, *args, **kwargs):
self.rows = rows
AlgorithmicEnv.__init__(self, *args, **kwargs)
def _move(self, movement):
named = self.MOVEMENTS[movement]
x, y = self.read_head_position
if named == 'left':
x -= 1
elif named == 'right':
x += 1
elif named == 'up':
y -= 1
elif named == 'down':
y += 1
else:
raise ValueError("Unrecognized direction: {}".format(named))
self.read_head_position = x, y
def generate_input_data(self, size):
return [
[self.np_random.randint(self.base) for _ in range(self.rows)]
for __ in range(size)
]
def _get_obs(self, pos=None):
if pos is None:
pos = self.read_head_position
x, y = pos
if any(idx < 0 for idx in pos):
return self.base
try:
return self.input_data[x][y]
except IndexError:
return self.base
def render_observation(self):
x = self.read_head_position
label = "Observation Grid : "
x_str = ""
for j in range(-1, self.rows+1):
if j != -1:
x_str += " " * len(label)
for i in range(-2, self.input_width + 2):
if i == x[0] and j == x[1]:
x_str += colorize(self._get_str_obs((i, j)), 'green', highlight=True)
else:
x_str += self._get_str_obs((i, j))
x_str += "\n"
x_str = label + x_str
return x_str

View File

@ -0,0 +1,13 @@
"""
Task is to copy content from the input tape to
the output tape. http://arxiv.org/abs/1511.07275
"""
from gym.envs.algorithmic import algorithmic_env
class CopyEnv(algorithmic_env.TapeAlgorithmicEnv):
def __init__(self, base=5, chars=True):
super(CopyEnv, self).__init__(base=base, chars=chars)
def target_from_input_data(self, input_data):
return input_data

View File

@ -0,0 +1,24 @@
"""
Task is to return every nth character from the input tape.
http://arxiv.org/abs/1511.07275
"""
from __future__ import division
from gym.envs.algorithmic import algorithmic_env
class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
def __init__(self, duplication=2, base=5):
self.duplication = duplication
super(DuplicatedInputEnv, self).__init__(base=base, chars=True)
def generate_input_data(self, size):
res = []
if size < self.duplication:
size = self.duplication
for i in range(size//self.duplication):
char = self.np_random.randint(self.base)
for _ in range(self.duplication):
res.append(char)
return res
def target_from_input_data(self, input_data):
return [input_data[i] for i in range(0, len(input_data), self.duplication)]

View File

@ -0,0 +1,15 @@
"""
Task is to copy content multiple times from the input tape to
the output tape. http://arxiv.org/abs/1511.07275
"""
from gym.envs.algorithmic import algorithmic_env
class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
def __init__(self, base=5):
super(RepeatCopyEnv, self).__init__(base=base, chars=True)
self.last = 50
def target_from_input_data(self, input_data):
return input_data + list(reversed(input_data)) + input_data

View File

@ -0,0 +1,15 @@
"""
Task is to reverse content over the input tape.
http://arxiv.org/abs/1511.07275
"""
from gym.envs.algorithmic import algorithmic_env
class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
def __init__(self, base=2):
super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1)
self.last = 50
def target_from_input_data(self, input_str):
return list(reversed(input_str))

View File

@ -0,0 +1,30 @@
from __future__ import division
import numpy as np
from gym.envs.algorithmic import algorithmic_env
class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
def __init__(self, rows=2, base=3):
super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False)
def target_from_input_data(self, input_strings):
curry = 0
target = []
for digits in input_strings:
total = sum(digits) + curry
target.append(total % self.base)
curry = total // self.base
if curry > 0:
target.append(curry)
return target
@property
def time_limit(self):
# Quirk preserved for the sake of consistency: add the length of the input
# rather than the length of the desired output (which may differ if there's
# an extra carried digit).
# TODO: It seems like this time limit is so strict as to make Addition3-v0
# unsolvable, since agents aren't even given enough time steps to look at
# all the digits. (The solutions on the scoreboard seem to only work by
# save-scumming.)
return self.input_width*2 + 4

View File

@ -0,0 +1,239 @@
from gym.envs import algorithmic as alg
import unittest
# All concrete subclasses of AlgorithmicEnv
ALL_ENVS = [
alg.copy_.CopyEnv,
alg.duplicated_input.DuplicatedInputEnv,
alg.repeat_copy.RepeatCopyEnv,
alg.reverse.ReverseEnv,
alg.reversed_addition.ReversedAdditionEnv,
]
ALL_TAPE_ENVS = [env for env in ALL_ENVS
if issubclass(env, alg.algorithmic_env.TapeAlgorithmicEnv)]
ALL_GRID_ENVS = [env for env in ALL_ENVS
if issubclass(env, alg.algorithmic_env.GridAlgorithmicEnv)]
def imprint(env, input_arr):
"""Monkey-patch the given environment so that when reset() is called, the
input tape/grid will be set to the given data, rather than being randomly
generated."""
env.generate_input_data = lambda _: input_arr
class TestAlgorithmicEnvInteractions(unittest.TestCase):
"""Test some generic behaviour not specific to any particular algorithmic
environment. Movement, allocation of rewards, etc."""
CANNED_INPUT = [0, 1]
ENV_KLS = alg.copy_.CopyEnv
LEFT, RIGHT = ENV_KLS._movement_idx('left'), ENV_KLS._movement_idx('right')
def setUp(self):
self.env = self.ENV_KLS(base=2, chars=True)
imprint(self.env, self.CANNED_INPUT)
def test_successful_interaction(self):
obs = self.env.reset()
self.assertEqual(obs, 0)
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
self.assertEqual(obs, 1)
self.assertGreater(reward, 0)
self.assertFalse(done)
obs, reward, done, _ = self.env.step([self.LEFT, 1, 1])
self.assertTrue(done)
self.assertGreater(reward, 0)
def test_bad_output_fail_fast(self):
obs = self.env.reset()
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
self.assertTrue(done)
self.assertLess(reward, 0)
def test_levelup(self):
obs = self.env.reset()
# Kind of a hack
alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls = []
min_length = self.env.min_length
for i in range(self.env.last):
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 0])
self.assertFalse(done)
obs, reward, done, _ = self.env.step([self.RIGHT, 1, 1])
self.assertTrue(done)
self.env.reset()
if i < self.env.last-1:
self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), i+1)
else:
# Should have leveled up on the last iteration
self.assertEqual(self.env.min_length, min_length+1)
self.assertEqual(len(alg.algorithmic_env.AlgorithmicEnv.reward_shortfalls), 0)
def test_walk_off_the_end(self):
obs = self.env.reset()
# Walk off the end
obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
self.assertEqual(obs, self.env.base)
self.assertEqual(r, 0)
self.assertFalse(done)
# Walk further off track
obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
self.assertEqual(obs, self.env.base)
self.assertFalse(done)
# Return to the first input character
obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
self.assertEqual(obs, self.env.base)
self.assertFalse(done)
obs, r, done, _ = self.env.step([self.RIGHT, 0, 0])
self.assertEqual(obs, 0)
def test_grid_naviation(self):
env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=6)
N,S,E,W = [env._movement_idx(named_dir) for named_dir in ['up', 'down', 'right', 'left']]
# Corresponds to a grid that looks like...
# 0 1 2
# 3 4 5
canned = [ [0, 3], [1, 4], [2, 5] ]
imprint(env, canned)
obs = env.reset()
self.assertEqual(obs, 0)
navigation = [
(S, 3), (N, 0), (E, 1), (S, 4), (S, 6), (E, 6), (N, 5), (N, 2), (W, 1)
]
for (movement, expected_obs) in navigation:
obs, reward, done, _ = env.step([movement, 0, 0])
self.assertEqual(reward, 0)
self.assertFalse(done)
self.assertEqual(obs, expected_obs)
def test_grid_success(self):
env = alg.reversed_addition.ReversedAdditionEnv(rows=2, base=3)
canned = [ [1, 2], [1, 0], [2, 2] ]
imprint(env, canned)
obs = env.reset()
target = [0, 2, 1, 1]
self.assertEqual(env.target, target)
self.assertEqual(obs, 1)
for i, target_digit in enumerate(target):
obs, reward, done, _ = env.step([0, 1, target_digit])
self.assertGreater(reward, 0)
self.assertEqual(done, i==len(target)-1)
def test_sane_time_limit(self):
obs = self.env.reset()
self.assertLess(self.env.time_limit, 100)
for _ in range(100):
obs, r, done, _ = self.env.step([self.LEFT, 0, 0])
if done:
return
self.fail("Time limit wasn't enforced")
def test_rendering(self):
env = self.env
obs = env.reset()
self.assertEqual(env._get_str_obs(), 'A')
self.assertEqual(env._get_str_obs(1), 'B')
self.assertEqual(env._get_str_obs(-1), ' ')
self.assertEqual(env._get_str_obs(2), ' ')
self.assertEqual(env._get_str_target(0), 'A')
self.assertEqual(env._get_str_target(1), 'B')
# Test numerical alphabet rendering
env = self.ENV_KLS(base=3, chars=False)
imprint(env, self.CANNED_INPUT)
env.reset()
self.assertEqual(env._get_str_obs(), '0')
self.assertEqual(env._get_str_obs(1), '1')
class TestTargets(unittest.TestCase):
"""Test the rules mapping input strings/grids to target outputs."""
def test_reverse_target(self):
input_expected = [
([0], [0]),
([0, 1], [1, 0]),
([1, 1], [1, 1]),
([1, 0, 1], [1, 0, 1]),
([0, 0, 1, 1], [1, 1, 0, 0]),
]
env = alg.reverse.ReverseEnv()
for input_arr, expected in input_expected:
target = env.target_from_input_data(input_arr)
self.assertEqual(target, expected)
def test_reversed_addition_target(self):
env = alg.reversed_addition.ReversedAdditionEnv(base=3)
input_expected = [
([[1,1], [1,1]], [2, 2]),
([[2,2], [0,1]], [1, 2]),
([[2,1], [1,1], [1,1], [1,0]], [0, 0, 0, 2]),
]
for (input_grid, expected_target) in input_expected:
self.assertEqual(env.target_from_input_data(input_grid), expected_target)
def test_reversed_addition_3rows(self):
env = alg.reversed_addition.ReversedAdditionEnv(base=3, rows=3)
input_expected = [
([[1,1,0],[0,1,1]], [2, 2]),
([[1,1,2],[0,1,1]], [1,0,1]),
]
for (input_grid, expected_target) in input_expected:
self.assertEqual(env.target_from_input_data(input_grid), expected_target)
def test_copy_target(self):
env = alg.copy_.CopyEnv()
self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2])
def test_duplicated_input_target(self):
env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
self.assertEqual(env.target_from_input_data([0, 0, 0, 0, 1, 1]), [0, 0, 1])
def test_repeat_copy_target(self):
env = alg.repeat_copy.RepeatCopyEnv()
self.assertEqual(env.target_from_input_data([0, 1, 2]), [0, 1, 2, 2, 1, 0, 0, 1, 2])
class TestInputGeneration(unittest.TestCase):
"""Test random input generation.
"""
def test_tape_inputs(self):
for env_kls in ALL_TAPE_ENVS:
env = env_kls()
for size in range(2,5):
input_tape = env.generate_input_data(size)
self.assertTrue(all(0<=x<=env.base for x in input_tape),
"Invalid input tape from env {}: {}".format(env_kls, input_tape))
# DuplicatedInput needs to generate inputs with even length,
# so it may be short one
self.assertLessEqual(len(input_tape), size)
def test_grid_inputs(self):
for env_kls in ALL_GRID_ENVS:
env = env_kls()
for size in range(2, 5):
input_grid = env.generate_input_data(size)
# Should get "size" sublists, each of length self.rows (not the
# opposite, as you might expect)
self.assertEqual(len(input_grid), size)
self.assertTrue(all(len(col) == env.rows for col in input_grid))
self.assertTrue(all(0<=x<=env.base for x in input_grid[0]))
def test_duplicatedinput_inputs(self):
"""The duplicated_input env needs to generate strings with the appropriate
amount of repetiion."""
env = alg.duplicated_input.DuplicatedInputEnv(duplication=2)
input_tape = env.generate_input_data(4)
self.assertEqual(len(input_tape), 4)
self.assertEqual(input_tape[0], input_tape[1])
self.assertEqual(input_tape[2], input_tape[3])
# If requested input size isn't a multiple of duplication, go lower
input_tape = env.generate_input_data(3)
self.assertEqual(len(input_tape), 2)
self.assertEqual(input_tape[0], input_tape[1])
# If requested input size is *less than* duplication, go up
input_tape = env.generate_input_data(1)
self.assertEqual(len(input_tape), 2)
self.assertEqual(input_tape[0], input_tape[1])
env = alg.duplicated_input.DuplicatedInputEnv(duplication=3)
input_tape = env.generate_input_data(6)
self.assertEqual(len(input_tape), 6)
self.assertEqual(input_tape[0], input_tape[1])
self.assertEqual(input_tape[1], input_tape[2])
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1 @@
from gym.envs.atari.atari_env import AtariEnv

View File

@ -0,0 +1,194 @@
import numpy as np
import os
import gym
from gym import error, spaces
from gym import utils
from gym.utils import seeding
try:
import atari_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)".format(e))
def to_ram(ale):
ram_size = ale.getRAMSize()
ram = np.zeros((ram_size),dtype=np.uint8)
ale.getRAM(ram)
return ram
class AtariEnv(gym.Env, utils.EzPickle):
metadata = {'render.modes': ['human', 'rgb_array']}
def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5),
repeat_action_probability=0., full_action_space=False):
"""Frameskip should be either a tuple (indicating a random range to
choose from, with the top value exclude), or an int."""
utils.EzPickle.__init__(self, game, obs_type, frameskip, repeat_action_probability)
assert obs_type in ('ram', 'image')
self.game_path = atari_py.get_game_path(game)
if not os.path.exists(self.game_path):
raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path))
self._obs_type = obs_type
self.frameskip = frameskip
self.ale = atari_py.ALEInterface()
self.viewer = None
# Tune (or disable) ALE's action repeat:
# https://github.com/openai/gym/issues/349
assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability)
self.seed()
self._action_set = (self.ale.getLegalActionSet() if full_action_space
else self.ale.getMinimalActionSet())
self.action_space = spaces.Discrete(len(self._action_set))
(screen_width,screen_height) = self.ale.getScreenDims()
if self._obs_type == 'ram':
self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
elif self._obs_type == 'image':
self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
else:
raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def seed(self, seed=None):
self.np_random, seed1 = seeding.np_random(seed)
# Derive a random seed. This gets passed as a uint, but gets
# checked as an int elsewhere, so we need to keep it below
# 2**31.
seed2 = seeding.hash_seed(seed1 + 1) % 2**31
# Empirically, we need to seed before loading the ROM.
self.ale.setInt(b'random_seed', seed2)
self.ale.loadROM(self.game_path)
return [seed1, seed2]
def step(self, a):
reward = 0.0
action = self._action_set[a]
if isinstance(self.frameskip, int):
num_steps = self.frameskip
else:
num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1])
for _ in range(num_steps):
reward += self.ale.act(action)
ob = self._get_obs()
return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}
def _get_image(self):
return self.ale.getScreenRGB2()
def _get_ram(self):
return to_ram(self.ale)
@property
def _n_actions(self):
return len(self._action_set)
def _get_obs(self):
if self._obs_type == 'ram':
return self._get_ram()
elif self._obs_type == 'image':
img = self._get_image()
return img
# return: (states, observations)
def reset(self):
self.ale.reset_game()
return self._get_obs()
def render(self, mode='human'):
img = self._get_image()
if mode == 'rgb_array':
return img
elif mode == 'human':
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.SimpleImageViewer()
self.viewer.imshow(img)
return self.viewer.isopen
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
def get_action_meanings(self):
return [ACTION_MEANING[i] for i in self._action_set]
def get_keys_to_action(self):
KEYWORD_TO_KEY = {
'UP': ord('w'),
'DOWN': ord('s'),
'LEFT': ord('a'),
'RIGHT': ord('d'),
'FIRE': ord(' '),
}
keys_to_action = {}
for action_id, action_meaning in enumerate(self.get_action_meanings()):
keys = []
for keyword, key in KEYWORD_TO_KEY.items():
if keyword in action_meaning:
keys.append(key)
keys = tuple(sorted(keys))
assert keys not in keys_to_action
keys_to_action[keys] = action_id
return keys_to_action
def clone_state(self):
"""Clone emulator state w/o system state. Restoring this state will
*not* give an identical environment. For complete cloning and restoring
of the full state, see `{clone,restore}_full_state()`."""
state_ref = self.ale.cloneState()
state = self.ale.encodeState(state_ref)
self.ale.deleteState(state_ref)
return state
def restore_state(self, state):
"""Restore emulator state w/o system state."""
state_ref = self.ale.decodeState(state)
self.ale.restoreState(state_ref)
self.ale.deleteState(state_ref)
def clone_full_state(self):
"""Clone emulator state w/ system state including pseudorandomness.
Restoring this state will give an identical environment."""
state_ref = self.ale.cloneSystemState()
state = self.ale.encodeState(state_ref)
self.ale.deleteState(state_ref)
return state
def restore_full_state(self, state):
"""Restore emulator state w/ system state including pseudorandomness."""
state_ref = self.ale.decodeState(state)
self.ale.restoreSystemState(state_ref)
self.ale.deleteState(state_ref)
ACTION_MEANING = {
0 : "NOOP",
1 : "FIRE",
2 : "UP",
3 : "RIGHT",
4 : "LEFT",
5 : "DOWN",
6 : "UPRIGHT",
7 : "UPLEFT",
8 : "DOWNRIGHT",
9 : "DOWNLEFT",
10 : "UPFIRE",
11 : "RIGHTFIRE",
12 : "LEFTFIRE",
13 : "DOWNFIRE",
14 : "UPRIGHTFIRE",
15 : "UPLEFTFIRE",
16 : "DOWNRIGHTFIRE",
17 : "DOWNLEFTFIRE",
}

View File

@ -0,0 +1,4 @@
from gym.envs.box2d.lunar_lander import LunarLander
from gym.envs.box2d.lunar_lander import LunarLanderContinuous
from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
from gym.envs.box2d.car_racing import CarRacing

View File

@ -0,0 +1,582 @@
import sys
import math
import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
import gym
from gym import spaces
from gym.utils import colorize, seeding, EzPickle
# This is simple 4-joints walker robot environment.
#
# There are two versions:
#
# - Normal, with slightly uneven terrain.
#
# - Hardcore with ladders, stumps, pitfalls.
#
# Reward is given for moving forward, total 300+ points up to the far end. If the robot falls,
# it gets -100. Applying motor torque costs a small amount of points, more optimal agent
# will get better score.
#
# Heuristic is provided for testing, it's also useful to get demonstrations to
# learn from. To run heuristic:
#
# python gym/envs/box2d/bipedal_walker.py
#
# State consists of hull angle speed, angular velocity, horizontal speed, vertical speed,
# position of joints and joints angular speed, legs contact with ground, and 10 lidar
# rangefinder measurements to help to deal with the hardcore version. There's no coordinates
# in the state vector. Lidar is less useful in normal version, but it works.
#
# To solve the game you need to get 300 points in 1600 time steps.
#
# To solve hardcore version you need 300 points in 2000 time steps.
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MOTORS_TORQUE = 80
SPEED_HIP = 4
SPEED_KNEE = 6
LIDAR_RANGE = 160/SCALE
INITIAL_RANDOM = 5
HULL_POLY =[
(-30,+9), (+6,+9), (+34,+1),
(+34,-8), (-30,-8)
]
LEG_DOWN = -8/SCALE
LEG_W, LEG_H = 8/SCALE, 34/SCALE
VIEWPORT_W = 600
VIEWPORT_H = 400
TERRAIN_STEP = 14/SCALE
TERRAIN_LENGTH = 200 # in steps
TERRAIN_HEIGHT = VIEWPORT_H/SCALE/4
TERRAIN_GRASS = 10 # low long are grass spots, in steps
TERRAIN_STARTPAD = 20 # in steps
FRICTION = 2.5
HULL_FD = fixtureDef(
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in HULL_POLY ]),
density=5.0,
friction=0.1,
categoryBits=0x0020,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
LEG_FD = fixtureDef(
shape=polygonShape(box=(LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
LOWER_FD = fixtureDef(
shape=polygonShape(box=(0.8*LEG_W/2, LEG_H/2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if self.env.hull==contact.fixtureA.body or self.env.hull==contact.fixtureB.body:
self.env.game_over = True
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = True
def EndContact(self, contact):
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = False
class BipedalWalker(gym.Env, EzPickle):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : FPS
}
hardcore = False
def __init__(self):
EzPickle.__init__(self)
self.seed()
self.viewer = None
self.world = Box2D.b2World()
self.terrain = None
self.hull = None
self.prev_shaping = None
self.fd_polygon = fixtureDef(
shape = polygonShape(vertices=
[(0, 0),
(1, 0),
(1, -1),
(0, -1)]),
friction = FRICTION)
self.fd_edge = fixtureDef(
shape = edgeShape(vertices=
[(0, 0),
(1, 1)]),
friction = FRICTION,
categoryBits=0x0001,
)
self.reset()
high = np.array([np.inf] * 24)
self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _destroy(self):
if not self.terrain: return
self.world.contactListener = None
for t in self.terrain:
self.world.DestroyBody(t)
self.terrain = []
self.world.DestroyBody(self.hull)
self.hull = None
for leg in self.legs:
self.world.DestroyBody(leg)
self.legs = []
self.joints = []
def _generate_terrain(self, hardcore):
GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
state = GRASS
velocity = 0.0
y = TERRAIN_HEIGHT
counter = TERRAIN_STARTPAD
oneshot = False
self.terrain = []
self.terrain_x = []
self.terrain_y = []
for i in range(TERRAIN_LENGTH):
x = i*TERRAIN_STEP
self.terrain_x.append(x)
if state==GRASS and not oneshot:
velocity = 0.8*velocity + 0.01*np.sign(TERRAIN_HEIGHT - y)
if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1)/SCALE #1
y += velocity
elif state==PIT and oneshot:
counter = self.np_random.randint(3, 5)
poly = [
(x, y),
(x+TERRAIN_STEP, y),
(x+TERRAIN_STEP, y-4*TERRAIN_STEP),
(x, y-4*TERRAIN_STEP),
]
self.fd_polygon.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
self.fd_polygon.shape.vertices=[(p[0]+TERRAIN_STEP*counter,p[1]) for p in poly]
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
counter += 2
original_y = y
elif state==PIT and not oneshot:
y = original_y
if counter > 1:
y -= 4*TERRAIN_STEP
elif state==STUMP and oneshot:
counter = self.np_random.randint(1, 3)
poly = [
(x, y),
(x+counter*TERRAIN_STEP, y),
(x+counter*TERRAIN_STEP, y+counter*TERRAIN_STEP),
(x, y+counter*TERRAIN_STEP),
]
self.fd_polygon.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
elif state==STAIRS and oneshot:
stair_height = +1 if self.np_random.rand() > 0.5 else -1
stair_width = self.np_random.randint(4, 5)
stair_steps = self.np_random.randint(3, 5)
original_y = y
for s in range(stair_steps):
poly = [
(x+( s*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+( s*stair_height)*TERRAIN_STEP),
(x+((1+s)*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
(x+( s*stair_width)*TERRAIN_STEP, y+(-1+s*stair_height)*TERRAIN_STEP),
]
self.fd_polygon.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_polygon)
t.color1, t.color2 = (1,1,1), (0.6,0.6,0.6)
self.terrain.append(t)
counter = stair_steps*stair_width
elif state==STAIRS and not oneshot:
s = stair_steps*stair_width - counter - stair_height
n = s/stair_width
y = original_y + (n*stair_height)*TERRAIN_STEP
oneshot = False
self.terrain_y.append(y)
counter -= 1
if counter==0:
counter = self.np_random.randint(TERRAIN_GRASS/2, TERRAIN_GRASS)
if state==GRASS and hardcore:
state = self.np_random.randint(1, _STATES_)
oneshot = True
else:
state = GRASS
oneshot = True
self.terrain_poly = []
for i in range(TERRAIN_LENGTH-1):
poly = [
(self.terrain_x[i], self.terrain_y[i]),
(self.terrain_x[i+1], self.terrain_y[i+1])
]
self.fd_edge.shape.vertices=poly
t = self.world.CreateStaticBody(
fixtures = self.fd_edge)
color = (0.3, 1.0 if i%2==0 else 0.8, 0.3)
t.color1 = color
t.color2 = color
self.terrain.append(t)
color = (0.4, 0.6, 0.3)
poly += [ (poly[1][0], 0), (poly[0][0], 0) ]
self.terrain_poly.append( (poly, color) )
self.terrain.reverse()
def _generate_clouds(self):
# Sorry for the clouds, couldn't resist
self.cloud_poly = []
for i in range(TERRAIN_LENGTH//20):
x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP
y = VIEWPORT_H/SCALE*3/4
poly = [
(x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP),
y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) )
for a in range(5) ]
x1 = min( [p[0] for p in poly] )
x2 = max( [p[0] for p in poly] )
self.cloud_poly.append( (poly,x1,x2) )
def reset(self):
self._destroy()
self.world.contactListener_bug_workaround = ContactDetector(self)
self.world.contactListener = self.world.contactListener_bug_workaround
self.game_over = False
self.prev_shaping = None
self.scroll = 0.0
self.lidar_render = 0
W = VIEWPORT_W/SCALE
H = VIEWPORT_H/SCALE
self._generate_terrain(self.hardcore)
self._generate_clouds()
init_x = TERRAIN_STEP*TERRAIN_STARTPAD/2
init_y = TERRAIN_HEIGHT+2*LEG_H
self.hull = self.world.CreateDynamicBody(
position = (init_x, init_y),
fixtures = HULL_FD
)
self.hull.color1 = (0.5,0.4,0.9)
self.hull.color2 = (0.3,0.3,0.5)
self.hull.ApplyForceToCenter((self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)
self.legs = []
self.joints = []
for i in [-1,+1]:
leg = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = LEG_FD
)
leg.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
leg.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=leg,
localAnchorA=(0, LEG_DOWN),
localAnchorB=(0, LEG_H/2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = i,
lowerAngle = -0.8,
upperAngle = 1.1,
)
self.legs.append(leg)
self.joints.append(self.world.CreateJoint(rjd))
lower = self.world.CreateDynamicBody(
position = (init_x, init_y - LEG_H*3/2 - LEG_DOWN),
angle = (i*0.05),
fixtures = LOWER_FD
)
lower.color1 = (0.6-i/10., 0.3-i/10., 0.5-i/10.)
lower.color2 = (0.4-i/10., 0.2-i/10., 0.3-i/10.)
rjd = revoluteJointDef(
bodyA=leg,
bodyB=lower,
localAnchorA=(0, -LEG_H/2),
localAnchorB=(0, LEG_H/2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed = 1,
lowerAngle = -1.6,
upperAngle = -0.1,
)
lower.ground_contact = False
self.legs.append(lower)
self.joints.append(self.world.CreateJoint(rjd))
self.drawlist = self.terrain + self.legs + [self.hull]
class LidarCallback(Box2D.b2.rayCastCallback):
def ReportFixture(self, fixture, point, normal, fraction):
if (fixture.filterData.categoryBits & 1) == 0:
return 1
self.p2 = point
self.fraction = fraction
return 0
self.lidar = [LidarCallback() for _ in range(10)]
return self.step(np.array([0,0,0,0]))[0]
def step(self, action):
#self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
control_speed = False # Should be easier as well
if control_speed:
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
else:
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
self.joints[0].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
self.joints[1].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
self.joints[2].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
self.joints[3].maxMotorTorque = float(MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1))
self.world.Step(1.0/FPS, 6*30, 2*30)
pos = self.hull.position
vel = self.hull.linearVelocity
for i in range(10):
self.lidar[i].fraction = 1.0
self.lidar[i].p1 = pos
self.lidar[i].p2 = (
pos[0] + math.sin(1.5*i/10.0)*LIDAR_RANGE,
pos[1] - math.cos(1.5*i/10.0)*LIDAR_RANGE)
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
state = [
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
2.0*self.hull.angularVelocity/FPS,
0.3*vel.x*(VIEWPORT_W/SCALE)/FPS, # Normalized to get -1..1 range
0.3*vel.y*(VIEWPORT_H/SCALE)/FPS,
self.joints[0].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
self.joints[0].speed / SPEED_HIP,
self.joints[1].angle + 1.0,
self.joints[1].speed / SPEED_KNEE,
1.0 if self.legs[1].ground_contact else 0.0,
self.joints[2].angle,
self.joints[2].speed / SPEED_HIP,
self.joints[3].angle + 1.0,
self.joints[3].speed / SPEED_KNEE,
1.0 if self.legs[3].ground_contact else 0.0
]
state += [l.fraction for l in self.lidar]
assert len(state)==24
self.scroll = pos.x - VIEWPORT_W/SCALE/5
shaping = 130*pos[0]/SCALE # moving forward is a way to receive reward (normalized to get 300 on completion)
shaping -= 5.0*abs(state[0]) # keep head straight, other than that and falling, any behavior is unpunished
reward = 0
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
for a in action:
reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
# normalized to about -50.0 using heuristic, more optimal agent should spend less
done = False
if self.game_over or pos[0] < 0:
reward = -100
done = True
if pos[0] > (TERRAIN_LENGTH-TERRAIN_GRASS)*TERRAIN_STEP:
done = True
return np.array(state), reward, done, {}
def render(self, mode='human'):
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(self.scroll, VIEWPORT_W/SCALE + self.scroll, 0, VIEWPORT_H/SCALE)
self.viewer.draw_polygon( [
(self.scroll, 0),
(self.scroll+VIEWPORT_W/SCALE, 0),
(self.scroll+VIEWPORT_W/SCALE, VIEWPORT_H/SCALE),
(self.scroll, VIEWPORT_H/SCALE),
], color=(0.9, 0.9, 1.0) )
for poly,x1,x2 in self.cloud_poly:
if x2 < self.scroll/2: continue
if x1 > self.scroll/2 + VIEWPORT_W/SCALE: continue
self.viewer.draw_polygon( [(p[0]+self.scroll/2, p[1]) for p in poly], color=(1,1,1))
for poly, color in self.terrain_poly:
if poly[1][0] < self.scroll: continue
if poly[0][0] > self.scroll + VIEWPORT_W/SCALE: continue
self.viewer.draw_polygon(poly, color=color)
self.lidar_render = (self.lidar_render+1) % 100
i = self.lidar_render
if i < 2*len(self.lidar):
l = self.lidar[i] if i < len(self.lidar) else self.lidar[len(self.lidar)-i-1]
self.viewer.draw_polyline( [l.p1, l.p2], color=(1,0,0), linewidth=1 )
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
t = rendering.Transform(translation=trans*f.shape.pos)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color1).add_attr(t)
self.viewer.draw_circle(f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2).add_attr(t)
else:
path = [trans*v for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1)
path.append(path[0])
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
flagy1 = TERRAIN_HEIGHT
flagy2 = flagy1 + 50/SCALE
x = TERRAIN_STEP*3
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(0,0,0), linewidth=2 )
f = [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)]
self.viewer.draw_polygon(f, color=(0.9,0.2,0) )
self.viewer.draw_polyline(f + [f[0]], color=(0,0,0), linewidth=2 )
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
class BipedalWalkerHardcore(BipedalWalker):
hardcore = True
if __name__=="__main__":
# Heurisic: suboptimal, have no notion of balance.
env = BipedalWalker()
env.reset()
steps = 0
total_reward = 0
a = np.array([0.0, 0.0, 0.0, 0.0])
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1,2,3
SPEED = 0.29 # Will fall forward on higher speed
state = STAY_ON_ONE_LEG
moving_leg = 0
supporting_leg = 1 - moving_leg
SUPPORT_KNEE_ANGLE = +0.1
supporting_knee_angle = SUPPORT_KNEE_ANGLE
while True:
s, r, done, info = env.step(a)
total_reward += r
if steps % 20 == 0 or done:
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4] ]))
print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9] ]))
print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
steps += 1
contact0 = s[8]
contact1 = s[13]
moving_s_base = 4 + 5*moving_leg
supporting_s_base = 4 + 5*supporting_leg
hip_targ = [None,None] # -0.8 .. +1.1
knee_targ = [None,None] # -0.6 .. +0.9
hip_todo = [0.0, 0.0]
knee_todo = [0.0, 0.0]
if state==STAY_ON_ONE_LEG:
hip_targ[moving_leg] = 1.1
knee_targ[moving_leg] = -0.6
supporting_knee_angle += 0.03
if s[2] > SPEED: supporting_knee_angle += 0.03
supporting_knee_angle = min( supporting_knee_angle, SUPPORT_KNEE_ANGLE )
knee_targ[supporting_leg] = supporting_knee_angle
if s[supporting_s_base+0] < 0.10: # supporting leg is behind
state = PUT_OTHER_DOWN
if state==PUT_OTHER_DOWN:
hip_targ[moving_leg] = +0.1
knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
knee_targ[supporting_leg] = supporting_knee_angle
if s[moving_s_base+4]:
state = PUSH_OFF
supporting_knee_angle = min( s[moving_s_base+2], SUPPORT_KNEE_ANGLE )
if state==PUSH_OFF:
knee_targ[moving_leg] = supporting_knee_angle
knee_targ[supporting_leg] = +1.0
if s[supporting_s_base+2] > 0.88 or s[2] > 1.2*SPEED:
state = STAY_ON_ONE_LEG
moving_leg = 1 - moving_leg
supporting_leg = 1 - moving_leg
if hip_targ[0]: hip_todo[0] = 0.9*(hip_targ[0] - s[4]) - 0.25*s[5]
if hip_targ[1]: hip_todo[1] = 0.9*(hip_targ[1] - s[9]) - 0.25*s[10]
if knee_targ[0]: knee_todo[0] = 4.0*(knee_targ[0] - s[6]) - 0.25*s[7]
if knee_targ[1]: knee_todo[1] = 4.0*(knee_targ[1] - s[11]) - 0.25*s[12]
hip_todo[0] -= 0.9*(0-s[0]) - 1.5*s[1] # PID to keep head strait
hip_todo[1] -= 0.9*(0-s[0]) - 1.5*s[1]
knee_todo[0] -= 15.0*s[3] # vertical speed, to damp oscillations
knee_todo[1] -= 15.0*s[3]
a[0] = hip_todo[0]
a[1] = knee_todo[0]
a[2] = hip_todo[1]
a[3] = knee_todo[1]
a = np.clip(0.5*a, -1.0, 1.0)
env.render()
if done: break

View File

@ -0,0 +1,244 @@
import numpy as np
import math
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
# Top-down car dynamics simulation.
#
# Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
# This simulation is a bit more detailed, with wheels rotation.
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
SIZE = 0.02
ENGINE_POWER = 100000000*SIZE*SIZE
WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
FRICTION_LIMIT = 1000000*SIZE*SIZE # friction ~= mass ~= size^2 (calculated implicitly using density)
WHEEL_R = 27
WHEEL_W = 14
WHEELPOS = [
(-55,+80), (+55,+80),
(-55,-82), (+55,-82)
]
HULL_POLY1 =[
(-60,+130), (+60,+130),
(+60,+110), (-60,+110)
]
HULL_POLY2 =[
(-15,+120), (+15,+120),
(+20, +20), (-20, 20)
]
HULL_POLY3 =[
(+25, +20),
(+50, -10),
(+50, -40),
(+20, -90),
(-20, -90),
(-50, -40),
(-50, -10),
(-25, +20)
]
HULL_POLY4 =[
(-50,-120), (+50,-120),
(+50,-90), (-50,-90)
]
WHEEL_COLOR = (0.0,0.0,0.0)
WHEEL_WHITE = (0.3,0.3,0.3)
MUD_COLOR = (0.4,0.4,0.0)
class Car:
def __init__(self, world, init_angle, init_x, init_y):
self.world = world
self.hull = self.world.CreateDynamicBody(
position = (init_x, init_y),
angle = init_angle,
fixtures = [
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0),
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0),
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0),
fixtureDef(shape = polygonShape(vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0)
]
)
self.hull.color = (0.8,0.0,0.0)
self.wheels = []
self.fuel_spent = 0.0
WHEEL_POLY = [
(-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R),
(+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R)
]
for wx,wy in WHEELPOS:
front_k = 1.0 if wy > 0 else 1.0
w = self.world.CreateDynamicBody(
position = (init_x+wx*SIZE, init_y+wy*SIZE),
angle = init_angle,
fixtures = fixtureDef(
shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]),
density=0.1,
categoryBits=0x0020,
maskBits=0x001,
restitution=0.0)
)
w.wheel_rad = front_k*WHEEL_R*SIZE
w.color = WHEEL_COLOR
w.gas = 0.0
w.brake = 0.0
w.steer = 0.0
w.phase = 0.0 # wheel angle
w.omega = 0.0 # angular velocity
w.skid_start = None
w.skid_particle = None
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=w,
localAnchorA=(wx*SIZE,wy*SIZE),
localAnchorB=(0,0),
enableMotor=True,
enableLimit=True,
maxMotorTorque=180*900*SIZE*SIZE,
motorSpeed = 0,
lowerAngle = -0.4,
upperAngle = +0.4,
)
w.joint = self.world.CreateJoint(rjd)
w.tiles = set()
w.userData = w
self.wheels.append(w)
self.drawlist = self.wheels + [self.hull]
self.particles = []
def gas(self, gas):
'control: rear wheel drive'
gas = np.clip(gas, 0, 1)
for w in self.wheels[2:4]:
diff = gas - w.gas
if diff > 0.1: diff = 0.1 # gradually increase, but stop immediately
w.gas += diff
def brake(self, b):
'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
for w in self.wheels:
w.brake = b
def steer(self, s):
'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
self.wheels[0].steer = s
self.wheels[1].steer = s
def step(self, dt):
for w in self.wheels:
# Steer each wheel
dir = np.sign(w.steer - w.joint.angle)
val = abs(w.steer - w.joint.angle)
w.joint.motorSpeed = dir*min(50.0*val, 3.0)
# Position => friction_limit
grass = True
friction_limit = FRICTION_LIMIT*0.6 # Grass friction if no tile
for tile in w.tiles:
friction_limit = max(friction_limit, FRICTION_LIMIT*tile.road_friction)
grass = False
# Force
forw = w.GetWorldVector( (0,1) )
side = w.GetWorldVector( (1,0) )
v = w.linearVelocity
vf = forw[0]*v[0] + forw[1]*v[1] # forward speed
vs = side[0]*v[0] + side[1]*v[1] # side speed
# WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
# WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
# domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0) # small coef not to divide by zero
self.fuel_spent += dt*ENGINE_POWER*w.gas
if w.brake >= 0.9:
w.omega = 0
elif w.brake > 0:
BRAKE_FORCE = 15 # radians per second
dir = -np.sign(w.omega)
val = BRAKE_FORCE*w.brake
if abs(val) > abs(w.omega): val = abs(w.omega) # low speed => same as = 0
w.omega += dir*val
w.phase += w.omega*dt
vr = w.omega*w.wheel_rad # rotating wheel speed
f_force = -vf + vr # force direction is direction of speed difference
p_force = -vs
# Physically correct is to always apply friction_limit until speed is equal.
# But dt is finite, that will lead to oscillations if difference is already near zero.
f_force *= 205000*SIZE*SIZE # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
p_force *= 205000*SIZE*SIZE
force = np.sqrt(np.square(f_force) + np.square(p_force))
# Skid trace
if abs(force) > 2.0*friction_limit:
if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30:
w.skid_particle.poly.append( (w.position[0], w.position[1]) )
elif w.skid_start is None:
w.skid_start = w.position
else:
w.skid_particle = self._create_particle( w.skid_start, w.position, grass )
w.skid_start = None
else:
w.skid_start = None
w.skid_particle = None
if abs(force) > friction_limit:
f_force /= force
p_force /= force
force = friction_limit # Correct physics here
f_force *= force
p_force *= force
w.omega -= dt*f_force*w.wheel_rad/WHEEL_MOMENT_OF_INERTIA
w.ApplyForceToCenter( (
p_force*side[0] + f_force*forw[0],
p_force*side[1] + f_force*forw[1]), True )
def draw(self, viewer, draw_particles=True):
if draw_particles:
for p in self.particles:
viewer.draw_polyline(p.poly, color=p.color, linewidth=5)
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
path = [trans*v for v in f.shape.vertices]
viewer.draw_polygon(path, color=obj.color)
if "phase" not in obj.__dict__: continue
a1 = obj.phase
a2 = obj.phase + 1.2 # radians
s1 = math.sin(a1)
s2 = math.sin(a2)
c1 = math.cos(a1)
c2 = math.cos(a2)
if s1>0 and s2>0: continue
if s1>0: c1 = np.sign(c1)
if s2>0: c2 = np.sign(c2)
white_poly = [
(-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
(+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
]
viewer.draw_polygon([trans*v for v in white_poly], color=WHEEL_WHITE)
def _create_particle(self, point1, point2, grass):
class Particle:
pass
p = Particle()
p.color = WHEEL_COLOR if not grass else MUD_COLOR
p.ttl = 1
p.poly = [(point1[0],point1[1]), (point2[0],point2[1])]
p.grass = grass
self.particles.append(p)
while len(self.particles) > 30:
self.particles.pop(0)
return p
def destroy(self):
self.world.DestroyBody(self.hull)
self.hull = None
for w in self.wheels:
self.world.DestroyBody(w)
self.wheels = []

View File

@ -0,0 +1,498 @@
import sys, math
import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
import gym
from gym import spaces
from gym.envs.box2d.car_dynamics import Car
from gym.utils import colorize, seeding, EzPickle
import pyglet
from pyglet import gl
# Easiest continuous control task to learn from pixels, a top-down racing environment.
# Discreet control is reasonable in this environment as well, on/off discretisation is
# fine.
#
# State consists of STATE_W x STATE_H pixels.
#
# Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
# the total number of tiles in track. For example, if you have finished in 732 frames,
# your reward is 1000 - 0.1*732 = 926.8 points.
#
# Game is solved when agent consistently gets 900+ points. Track is random every episode.
#
# Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
# is far off the track, then it will get -100 and die.
#
# Some indicators shown at the bottom of the window and the state RGB buffer. From
# left to right: true speed, four ABS sensors, steering wheel position, gyroscope.
#
# To play yourself (it's rather fast for humans), type:
#
# python gym/envs/box2d/car_racing.py
#
# Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
# same time.
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
STATE_W = 96 # less than Atari 160x192
STATE_H = 96
VIDEO_W = 600
VIDEO_H = 400
WINDOW_W = 1200
WINDOW_H = 1000
SCALE = 6.0 # Track scale
TRACK_RAD = 900/SCALE # Track is heavily morphed circle with this radius
PLAYFIELD = 2000/SCALE # Game over boundary
FPS = 50
ZOOM = 2.7 # Camera zoom
ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom)
TRACK_DETAIL_STEP = 21/SCALE
TRACK_TURN_RATE = 0.31
TRACK_WIDTH = 40/SCALE
BORDER = 8/SCALE
BORDER_MIN_COUNT = 4
ROAD_COLOR = [0.4, 0.4, 0.4]
class FrictionDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
self._contact(contact, True)
def EndContact(self, contact):
self._contact(contact, False)
def _contact(self, contact, begin):
tile = None
obj = None
u1 = contact.fixtureA.body.userData
u2 = contact.fixtureB.body.userData
if u1 and "road_friction" in u1.__dict__:
tile = u1
obj = u2
if u2 and "road_friction" in u2.__dict__:
tile = u2
obj = u1
if not tile: return
tile.color[0] = ROAD_COLOR[0]
tile.color[1] = ROAD_COLOR[1]
tile.color[2] = ROAD_COLOR[2]
if not obj or "tiles" not in obj.__dict__: return
if begin:
obj.tiles.add(tile)
#print tile.road_friction, "ADD", len(obj.tiles)
if not tile.road_visited:
tile.road_visited = True
self.env.reward += 1000.0/len(self.env.track)
self.env.tile_visited_count += 1
else:
obj.tiles.remove(tile)
#print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)
class CarRacing(gym.Env, EzPickle):
metadata = {
'render.modes': ['human', 'rgb_array', 'state_pixels'],
'video.frames_per_second' : FPS
}
def __init__(self):
EzPickle.__init__(self)
self.seed()
self.contactListener_keepref = FrictionDetector(self)
self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
self.viewer = None
self.invisible_state_window = None
self.invisible_video_window = None
self.road = None
self.car = None
self.reward = 0.0
self.prev_reward = 0.0
self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32) # steer, gas, brake
self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _destroy(self):
if not self.road: return
for t in self.road:
self.world.DestroyBody(t)
self.road = []
self.car.destroy()
def _create_track(self):
CHECKPOINTS = 12
# Create checkpoints
checkpoints = []
for c in range(CHECKPOINTS):
alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
if c==0:
alpha = 0
rad = 1.5*TRACK_RAD
if c==CHECKPOINTS-1:
alpha = 2*math.pi*c/CHECKPOINTS
self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
rad = 1.5*TRACK_RAD
checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
#print "\n".join(str(h) for h in checkpoints)
#self.road_poly = [ ( # uncomment this to see checkpoints
# [ (tx,ty) for a,tx,ty in checkpoints ],
# (0.7,0.7,0.9) ) ]
self.road = []
# Go from one checkpoint to another to create track
x, y, beta = 1.5*TRACK_RAD, 0, 0
dest_i = 0
laps = 0
track = []
no_freeze = 2500
visited_other_side = False
while 1:
alpha = math.atan2(y, x)
if visited_other_side and alpha > 0:
laps += 1
visited_other_side = False
if alpha < 0:
visited_other_side = True
alpha += 2*math.pi
while True: # Find destination from checkpoints
failed = True
while True:
dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
if alpha <= dest_alpha:
failed = False
break
dest_i += 1
if dest_i % len(checkpoints) == 0: break
if not failed: break
alpha -= 2*math.pi
continue
r1x = math.cos(beta)
r1y = math.sin(beta)
p1x = -r1y
p1y = r1x
dest_dx = dest_x - x # vector towards destination
dest_dy = dest_y - y
proj = r1x*dest_dx + r1y*dest_dy # destination vector projected on rad
while beta - alpha > 1.5*math.pi: beta -= 2*math.pi
while beta - alpha < -1.5*math.pi: beta += 2*math.pi
prev_beta = beta
proj *= SCALE
if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj))
x += p1x*TRACK_DETAIL_STEP
y += p1y*TRACK_DETAIL_STEP
track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
if laps > 4: break
no_freeze -= 1
if no_freeze==0: break
#print "\n".join([str(t) for t in enumerate(track)])
# Find closed loop range i1..i2, first loop should be ignored, second is OK
i1, i2 = -1, -1
i = len(track)
while True:
i -= 1
if i==0: return False # Failed
pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
if pass_through_start and i2==-1:
i2 = i
elif pass_through_start and i1==-1:
i1 = i
break
print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
assert i1!=-1
assert i2!=-1
track = track[i1:i2-1]
first_beta = track[0][1]
first_perp_x = math.cos(first_beta)
first_perp_y = math.sin(first_beta)
# Length of perpendicular jump to put together head and tail
well_glued_together = np.sqrt(
np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
if well_glued_together > TRACK_DETAIL_STEP:
return False
# Red-white border on hard turns
border = [False]*len(track)
for i in range(len(track)):
good = True
oneside = 0
for neg in range(BORDER_MIN_COUNT):
beta1 = track[i-neg-0][1]
beta2 = track[i-neg-1][1]
good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
oneside += np.sign(beta1 - beta2)
good &= abs(oneside) == BORDER_MIN_COUNT
border[i] = good
for i in range(len(track)):
for neg in range(BORDER_MIN_COUNT):
border[i-neg] |= border[i]
# Create tiles
for i in range(len(track)):
alpha1, beta1, x1, y1 = track[i]
alpha2, beta2, x2, y2 = track[i-1]
road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
t = self.world.CreateStaticBody( fixtures = fixtureDef(
shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l])
))
t.userData = t
c = 0.01*(i%3)
t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
t.road_visited = False
t.road_friction = 1.0
t.fixtures[0].sensor = True
self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
self.road.append(t)
if border[i]:
side = np.sign(beta2 - beta1)
b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1))
b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2))
b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
self.track = track
return True
def reset(self):
self._destroy()
self.reward = 0.0
self.prev_reward = 0.0
self.tile_visited_count = 0
self.t = 0.0
self.road_poly = []
self.human_render = False
while True:
success = self._create_track()
if success: break
print("retry to generate track (normal if there are not many of this messages)")
self.car = Car(self.world, *self.track[0][1:4])
return self.step(None)[0]
def step(self, action):
if action is not None:
self.car.steer(-action[0])
self.car.gas(action[1])
self.car.brake(action[2])
self.car.step(1.0/FPS)
self.world.Step(1.0/FPS, 6*30, 2*30)
self.t += 1.0/FPS
self.state = self.render("state_pixels")
step_reward = 0
done = False
if action is not None: # First step without action, called from reset()
self.reward -= 0.1
# We actually don't want to count fuel spent, we want car to be faster.
#self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER
self.car.fuel_spent = 0.0
step_reward = self.reward - self.prev_reward
self.prev_reward = self.reward
if self.tile_visited_count==len(self.track):
done = True
x, y = self.car.hull.position
if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
done = True
step_reward = -100
return self.state, step_reward, done, {}
def render(self, mode='human'):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
self.score_label = pyglet.text.Label('0000', font_size=36,
x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
color=(255,255,255,255))
self.transform = rendering.Transform()
if "t" not in self.__dict__: return # reset() not called yet
zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second
zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W
zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W
scroll_x = self.car.hull.position[0]
scroll_y = self.car.hull.position[1]
angle = -self.car.hull.angle
vel = self.car.hull.linearVelocity
if np.linalg.norm(vel) > 0.5:
angle = math.atan2(vel[0], vel[1])
self.transform.set_scale(zoom, zoom)
self.transform.set_translation(
WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
self.transform.set_rotation(angle)
self.car.draw(self.viewer, mode!="state_pixels")
arr = None
win = self.viewer.window
if mode != 'state_pixels':
win.switch_to()
win.dispatch_events()
if mode=="rgb_array" or mode=="state_pixels":
win.clear()
t = self.transform
if mode=='rgb_array':
VP_W = VIDEO_W
VP_H = VIDEO_H
else:
VP_W = STATE_W
VP_H = STATE_H
gl.glViewport(0, 0, VP_W, VP_H)
t.enable()
self.render_road()
for geom in self.viewer.onetime_geoms:
geom.render()
t.disable()
self.render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
arr = arr.reshape(VP_H, VP_W, 4)
arr = arr[::-1, :, 0:3]
if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video.
win.flip()
if mode=='human':
self.human_render = True
win.clear()
t = self.transform
gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
t.enable()
self.render_road()
for geom in self.viewer.onetime_geoms:
geom.render()
t.disable()
self.render_indicators(WINDOW_W, WINDOW_H)
win.flip()
self.viewer.onetime_geoms = []
return arr
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
def render_road(self):
gl.glBegin(gl.GL_QUADS)
gl.glColor4f(0.4, 0.8, 0.4, 1.0)
gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
gl.glColor4f(0.4, 0.9, 0.4, 1.0)
k = PLAYFIELD/20.0
for x in range(-20, 20, 2):
for y in range(-20, 20, 2):
gl.glVertex3f(k*x + k, k*y + 0, 0)
gl.glVertex3f(k*x + 0, k*y + 0, 0)
gl.glVertex3f(k*x + 0, k*y + k, 0)
gl.glVertex3f(k*x + k, k*y + k, 0)
for poly, color in self.road_poly:
gl.glColor4f(color[0], color[1], color[2], 1)
for p in poly:
gl.glVertex3f(p[0], p[1], 0)
gl.glEnd()
def render_indicators(self, W, H):
gl.glBegin(gl.GL_QUADS)
s = W/40.0
h = H/40.0
gl.glColor4f(0,0,0,1)
gl.glVertex3f(W, 0, 0)
gl.glVertex3f(W, 5*h, 0)
gl.glVertex3f(0, 5*h, 0)
gl.glVertex3f(0, 0, 0)
def vertical_ind(place, val, color):
gl.glColor4f(color[0], color[1], color[2], 1)
gl.glVertex3f((place+0)*s, h + h*val, 0)
gl.glVertex3f((place+1)*s, h + h*val, 0)
gl.glVertex3f((place+1)*s, h, 0)
gl.glVertex3f((place+0)*s, h, 0)
def horiz_ind(place, val, color):
gl.glColor4f(color[0], color[1], color[2], 1)
gl.glVertex3f((place+0)*s, 4*h , 0)
gl.glVertex3f((place+val)*s, 4*h, 0)
gl.glVertex3f((place+val)*s, 2*h, 0)
gl.glVertex3f((place+0)*s, 2*h, 0)
true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
vertical_ind(5, 0.02*true_speed, (1,1,1))
vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
gl.glEnd()
self.score_label.text = "%04i" % self.reward
self.score_label.draw()
if __name__=="__main__":
from pyglet.window import key
a = np.array( [0.0, 0.0, 0.0] )
def key_press(k, mod):
global restart
if k==0xff0d: restart = True
if k==key.LEFT: a[0] = -1.0
if k==key.RIGHT: a[0] = +1.0
if k==key.UP: a[1] = +1.0
if k==key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
def key_release(k, mod):
if k==key.LEFT and a[0]==-1.0: a[0] = 0
if k==key.RIGHT and a[0]==+1.0: a[0] = 0
if k==key.UP: a[1] = 0
if k==key.DOWN: a[2] = 0
env = CarRacing()
env.render()
record_video = False
if record_video:
env.monitor.start('/tmp/video-test', force=True)
env.viewer.window.on_key_press = key_press
env.viewer.window.on_key_release = key_release
while True:
env.reset()
total_reward = 0.0
steps = 0
restart = False
while True:
s, r, done, info = env.step(a)
total_reward += r
if steps % 200 == 0 or done:
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
#import matplotlib.pyplot as plt
#plt.imshow(s)
#plt.savefig("test.jpeg")
steps += 1
if not record_video: # Faster, but you can as well call env.render() every time to play full window.
env.render()
if done or restart: break
env.close()

View File

@ -0,0 +1,420 @@
import sys, math
import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
import gym
from gym import spaces
from gym.utils import seeding, EzPickle
# Rocket trajectory optimization is a classic topic in Optimal Control.
#
# According to Pontryagin's maximum principle it's optimal to fire engine full throttle or
# turn it off. That's the reason this environment is OK to have discreet actions (engine on or off).
#
# Landing pad is always at coordinates (0,0). Coordinates are the first two numbers in state vector.
# Reward for moving from the top of the screen to landing pad and zero speed is about 100..140 points.
# If lander moves away from landing pad it loses reward back. Episode finishes if the lander crashes or
# comes to rest, receiving additional -100 or +100 points. Each leg ground contact is +10. Firing main
# engine is -0.3 points each frame. Solved is 200 points.
#
# Landing outside landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land
# on its first attempt. Please see source code for details.
#
# To see heuristic landing, run:
#
# python gym/envs/box2d/lunar_lander.py
#
# To play yourself, run:
#
# python examples/agents/keyboard_agent.py LunarLander-v2
#
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MAIN_ENGINE_POWER = 13.0
SIDE_ENGINE_POWER = 0.6
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
LANDER_POLY =[
(-14,+17), (-17,0), (-17,-10),
(+17,-10), (+17,0), (+14,+17)
]
LEG_AWAY = 20
LEG_DOWN = 18
LEG_W, LEG_H = 2, 8
LEG_SPRING_TORQUE = 40
SIDE_ENGINE_HEIGHT = 14.0
SIDE_ENGINE_AWAY = 12.0
VIEWPORT_W = 600
VIEWPORT_H = 400
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if self.env.lander==contact.fixtureA.body or self.env.lander==contact.fixtureB.body:
self.env.game_over = True
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = True
def EndContact(self, contact):
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = False
class LunarLander(gym.Env, EzPickle):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : FPS
}
continuous = False
def __init__(self):
EzPickle.__init__(self)
self.seed()
self.viewer = None
self.world = Box2D.b2World()
self.moon = None
self.lander = None
self.particles = []
self.prev_reward = None
# useful range is -1 .. +1, but spikes can be higher
self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8,), dtype=np.float32)
if self.continuous:
# Action is two floats [main engine, left-right engines].
# Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
# Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32)
else:
# Nop, fire left engine, main engine, right engine
self.action_space = spaces.Discrete(4)
self.reset()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _destroy(self):
if not self.moon: return
self.world.contactListener = None
self._clean_particles(True)
self.world.DestroyBody(self.moon)
self.moon = None
self.world.DestroyBody(self.lander)
self.lander = None
self.world.DestroyBody(self.legs[0])
self.world.DestroyBody(self.legs[1])
def reset(self):
self._destroy()
self.world.contactListener_keepref = ContactDetector(self)
self.world.contactListener = self.world.contactListener_keepref
self.game_over = False
self.prev_shaping = None
W = VIEWPORT_W/SCALE
H = VIEWPORT_H/SCALE
# terrain
CHUNKS = 11
height = self.np_random.uniform(0, H/2, size=(CHUNKS+1,) )
chunk_x = [W/(CHUNKS-1)*i for i in range(CHUNKS)]
self.helipad_x1 = chunk_x[CHUNKS//2-1]
self.helipad_x2 = chunk_x[CHUNKS//2+1]
self.helipad_y = H/4
height[CHUNKS//2-2] = self.helipad_y
height[CHUNKS//2-1] = self.helipad_y
height[CHUNKS//2+0] = self.helipad_y
height[CHUNKS//2+1] = self.helipad_y
height[CHUNKS//2+2] = self.helipad_y
smooth_y = [0.33*(height[i-1] + height[i+0] + height[i+1]) for i in range(CHUNKS)]
self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) )
self.sky_polys = []
for i in range(CHUNKS-1):
p1 = (chunk_x[i], smooth_y[i])
p2 = (chunk_x[i+1], smooth_y[i+1])
self.moon.CreateEdgeFixture(
vertices=[p1,p2],
density=0,
friction=0.1)
self.sky_polys.append( [p1, p2, (p2[0],H), (p1[0],H)] )
self.moon.color1 = (0.0,0.0,0.0)
self.moon.color2 = (0.0,0.0,0.0)
initial_y = VIEWPORT_H/SCALE
self.lander = self.world.CreateDynamicBody(
position = (VIEWPORT_W/SCALE/2, initial_y),
angle=0.0,
fixtures = fixtureDef(
shape=polygonShape(vertices=[ (x/SCALE,y/SCALE) for x,y in LANDER_POLY ]),
density=5.0,
friction=0.1,
categoryBits=0x0010,
maskBits=0x001, # collide only with ground
restitution=0.0) # 0.99 bouncy
)
self.lander.color1 = (0.5,0.4,0.9)
self.lander.color2 = (0.3,0.3,0.5)
self.lander.ApplyForceToCenter( (
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM)
), True)
self.legs = []
for i in [-1,+1]:
leg = self.world.CreateDynamicBody(
position = (VIEWPORT_W/SCALE/2 - i*LEG_AWAY/SCALE, initial_y),
angle = (i*0.05),
fixtures = fixtureDef(
shape=polygonShape(box=(LEG_W/SCALE, LEG_H/SCALE)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001)
)
leg.ground_contact = False
leg.color1 = (0.5,0.4,0.9)
leg.color2 = (0.3,0.3,0.5)
rjd = revoluteJointDef(
bodyA=self.lander,
bodyB=leg,
localAnchorA=(0, 0),
localAnchorB=(i*LEG_AWAY/SCALE, LEG_DOWN/SCALE),
enableMotor=True,
enableLimit=True,
maxMotorTorque=LEG_SPRING_TORQUE,
motorSpeed=+0.3*i # low enough not to jump back into the sky
)
if i==-1:
rjd.lowerAngle = +0.9 - 0.5 # Yes, the most esoteric numbers here, angles legs have freedom to travel within
rjd.upperAngle = +0.9
else:
rjd.lowerAngle = -0.9
rjd.upperAngle = -0.9 + 0.5
leg.joint = self.world.CreateJoint(rjd)
self.legs.append(leg)
self.drawlist = [self.lander] + self.legs
return self.step(np.array([0,0]) if self.continuous else 0)[0]
def _create_particle(self, mass, x, y, ttl):
p = self.world.CreateDynamicBody(
position = (x,y),
angle=0.0,
fixtures = fixtureDef(
shape=circleShape(radius=2/SCALE, pos=(0,0)),
density=mass,
friction=0.1,
categoryBits=0x0100,
maskBits=0x001, # collide only with ground
restitution=0.3)
)
p.ttl = ttl
self.particles.append(p)
self._clean_particles(False)
return p
def _clean_particles(self, all):
while self.particles and (all or self.particles[0].ttl<0):
self.world.DestroyBody(self.particles.pop(0))
def step(self, action):
if self.continuous:
action = np.clip(action, -1, +1).astype(np.float32)
else:
assert self.action_space.contains(action), "%r (%s) invalid " % (action, type(action))
# Engines
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
side = (-tip[1], tip[0]);
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
m_power = 0.0
if (self.continuous and action[0] > 0.0) or (not self.continuous and action==2):
# Main engine
if self.continuous:
m_power = (np.clip(action[0], 0.0,1.0) + 1.0)*0.5 # 0.5..1.0
assert m_power>=0.5 and m_power <= 1.0
else:
m_power = 1.0
ox = tip[0]*(4/SCALE + 2*dispersion[0]) + side[0]*dispersion[1] # 4 is move a bit downwards, +-2 for randomness
oy = -tip[1]*(4/SCALE + 2*dispersion[0]) - side[1]*dispersion[1]
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
p = self._create_particle(3.5, impulse_pos[0], impulse_pos[1], m_power) # particles are just a decoration, 3.5 is here to make particle speed adequate
p.ApplyLinearImpulse( ( ox*MAIN_ENGINE_POWER*m_power, oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
self.lander.ApplyLinearImpulse( (-ox*MAIN_ENGINE_POWER*m_power, -oy*MAIN_ENGINE_POWER*m_power), impulse_pos, True)
s_power = 0.0
if (self.continuous and np.abs(action[1]) > 0.5) or (not self.continuous and action in [1,3]):
# Orientation engines
if self.continuous:
direction = np.sign(action[1])
s_power = np.clip(np.abs(action[1]), 0.5,1.0)
assert s_power>=0.5 and s_power <= 1.0
else:
direction = action-2
s_power = 1.0
ox = tip[0]*dispersion[0] + side[0]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
oy = -tip[1]*dispersion[0] - side[1]*(3*dispersion[1]+direction*SIDE_ENGINE_AWAY/SCALE)
impulse_pos = (self.lander.position[0] + ox - tip[0]*17/SCALE, self.lander.position[1] + oy + tip[1]*SIDE_ENGINE_HEIGHT/SCALE)
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
p.ApplyLinearImpulse( ( ox*SIDE_ENGINE_POWER*s_power, oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
self.lander.ApplyLinearImpulse( (-ox*SIDE_ENGINE_POWER*s_power, -oy*SIDE_ENGINE_POWER*s_power), impulse_pos, True)
self.world.Step(1.0/FPS, 6*30, 2*30)
pos = self.lander.position
vel = self.lander.linearVelocity
state = [
(pos.x - VIEWPORT_W/SCALE/2) / (VIEWPORT_W/SCALE/2),
(pos.y - (self.helipad_y+LEG_DOWN/SCALE)) / (VIEWPORT_H/SCALE/2),
vel.x*(VIEWPORT_W/SCALE/2)/FPS,
vel.y*(VIEWPORT_H/SCALE/2)/FPS,
self.lander.angle,
20.0*self.lander.angularVelocity/FPS,
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0
]
assert len(state)==8
reward = 0
shaping = \
- 100*np.sqrt(state[0]*state[0] + state[1]*state[1]) \
- 100*np.sqrt(state[2]*state[2] + state[3]*state[3]) \
- 100*abs(state[4]) + 10*state[6] + 10*state[7] # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
reward -= m_power*0.30 # less fuel spent is better, about -30 for heurisic landing
reward -= s_power*0.03
done = False
if self.game_over or abs(state[0]) >= 1.0:
done = True
reward = -100
if not self.lander.awake:
done = True
reward = +100
return np.array(state, dtype=np.float32), reward, done, {}
def render(self, mode='human'):
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
self.viewer.set_bounds(0, VIEWPORT_W/SCALE, 0, VIEWPORT_H/SCALE)
for obj in self.particles:
obj.ttl -= 0.15
obj.color1 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
obj.color2 = (max(0.2,0.2+obj.ttl), max(0.2,0.5*obj.ttl), max(0.2,0.5*obj.ttl))
self._clean_particles(False)
for p in self.sky_polys:
self.viewer.draw_polygon(p, color=(0,0,0))
for obj in self.particles + self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
t = rendering.Transform(translation=trans*f.shape.pos)
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color1).add_attr(t)
self.viewer.draw_circle(f.shape.radius, 20, color=obj.color2, filled=False, linewidth=2).add_attr(t)
else:
path = [trans*v for v in f.shape.vertices]
self.viewer.draw_polygon(path, color=obj.color1)
path.append(path[0])
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
for x in [self.helipad_x1, self.helipad_x2]:
flagy1 = self.helipad_y
flagy2 = flagy1 + 50/SCALE
self.viewer.draw_polyline( [(x, flagy1), (x, flagy2)], color=(1,1,1) )
self.viewer.draw_polygon( [(x, flagy2), (x, flagy2-10/SCALE), (x+25/SCALE, flagy2-5/SCALE)], color=(0.8,0.8,0) )
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer is not None:
self.viewer.close()
self.viewer = None
class LunarLanderContinuous(LunarLander):
continuous = True
def heuristic(env, s):
# Heuristic for:
# 1. Testing.
# 2. Demonstration rollout.
angle_targ = s[0]*0.5 + s[2]*1.0 # angle should point towards center (s[0] is horizontal coordinate, s[2] hor speed)
if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
if angle_targ < -0.4: angle_targ = -0.4
hover_targ = 0.55*np.abs(s[0]) # target y should be proporional to horizontal offset
# PID controller: s[4] angle, s[5] angularSpeed
angle_todo = (angle_targ - s[4])*0.5 - (s[5])*1.0
#print("angle_targ=%0.2f, angle_todo=%0.2f" % (angle_targ, angle_todo))
# PID controller: s[1] vertical coordinate s[3] vertical speed
hover_todo = (hover_targ - s[1])*0.5 - (s[3])*0.5
#print("hover_targ=%0.2f, hover_todo=%0.2f" % (hover_targ, hover_todo))
if s[6] or s[7]: # legs have contact
angle_todo = 0
hover_todo = -(s[3])*0.5 # override to reduce fall speed, that's all we need after contact
if env.continuous:
a = np.array( [hover_todo*20 - 1, -angle_todo*20] )
a = np.clip(a, -1, +1)
else:
a = 0
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2
elif angle_todo < -0.05: a = 3
elif angle_todo > +0.05: a = 1
return a
def demo_heuristic_lander(env, seed=None, render=False):
env.seed(seed)
total_reward = 0
steps = 0
s = env.reset()
while True:
a = heuristic(env, s)
s, r, done, info = env.step(a)
total_reward += r
if render:
still_open = env.render()
if still_open == False: break
if steps % 20 == 0 or done:
print("observations:", " ".join(["{:+0.2f}".format(x) for x in s]))
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
steps += 1
if done: break
return total_reward
if __name__ == '__main__':
demo_heuristic_lander(LunarLander(), render=True)

View File

@ -0,0 +1,13 @@
from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
def test_lunar_lander():
_test_lander(LunarLander(), seed=0)
def test_lunar_lander_continuous():
_test_lander(LunarLanderContinuous(), seed=0)
def _test_lander(env, seed=None, render=False):
total_reward = demo_heuristic_lander(env, seed=seed, render=render)
assert total_reward > 100

View File

@ -0,0 +1,6 @@
from gym.envs.classic_control.cartpole import CartPoleEnv
from gym.envs.classic_control.mountain_car import MountainCarEnv
from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
from gym.envs.classic_control.pendulum import PendulumEnv
from gym.envs.classic_control.acrobot import AcrobotEnv

View File

@ -0,0 +1,305 @@
"""classic Acrobot task"""
import numpy as np
from numpy import sin, cos, pi
from gym import core, spaces
from gym.utils import seeding
__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
"William Dabney", "Jonathan P. How"]
__license__ = "BSD 3-Clause"
__author__ = "Christoph Dann <cdann@cdann.de>"
# SOURCE:
# https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
class AcrobotEnv(core.Env):
"""
Acrobot is a 2-link pendulum with only the second joint actuated
Intitially, both links point downwards. The goal is to swing the
end-effector at a height at least the length of one link above the base.
Both links can swing freely and can pass by each other, i.e., they don't
collide when they have the same angle.
**STATE:**
The state consists of the sin() and cos() of the two rotational joint
angles and the joint angular velocities :
[cos(theta1) sin(theta1) cos(theta2) sin(theta2) thetaDot1 thetaDot2].
For the first link, an angle of 0 corresponds to the link pointing downwards.
The angle of the second link is relative to the angle of the first link.
An angle of 0 corresponds to having the same angle between the two links.
A state of [1, 0, 1, 0, ..., ...] means that both links point downwards.
**ACTIONS:**
The action is either applying +1, 0 or -1 torque on the joint between
the two pendulum links.
.. note::
The dynamics equations were missing some terms in the NIPS paper which
are present in the book. R. Sutton confirmed in personal correspondance
that the experimental results shown in the paper and the book were
generated with the equations shown in the book.
However, there is the option to run the domain with the paper equations
by setting book_or_nips = 'nips'
**REFERENCE:**
.. seealso::
R. Sutton: Generalization in Reinforcement Learning:
Successful Examples Using Sparse Coarse Coding (NIPS 1996)
.. seealso::
R. Sutton and A. G. Barto:
Reinforcement learning: An introduction.
Cambridge: MIT press, 1998.
.. warning::
This version of the domain uses the Runge-Kutta method for integrating
the system dynamics and is more realistic, but also considerably harder
than the original version which employs Euler integration,
see the AcrobotLegacy class.
"""
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 15
}
dt = .2
LINK_LENGTH_1 = 1. # [m]
LINK_LENGTH_2 = 1. # [m]
LINK_MASS_1 = 1. #: [kg] mass of link 1
LINK_MASS_2 = 1. #: [kg] mass of link 2
LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1
LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2
LINK_MOI = 1. #: moments of inertia for both links
MAX_VEL_1 = 4 * np.pi
MAX_VEL_2 = 9 * np.pi
AVAIL_TORQUE = [-1., 0., +1]
torque_noise_max = 0.
#: use dynamics equations from the nips paper or the book
book_or_nips = "book"
action_arrow = None
domain_fig = None
actions_num = 3
def __init__(self):
self.viewer = None
high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
self.action_space = spaces.Discrete(3)
self.state = None
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def reset(self):
self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,))
return self._get_ob()
def step(self, a):
s = self.state
torque = self.AVAIL_TORQUE[a]
# Add noise to the force action
if self.torque_noise_max > 0:
torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)
# Now, augment the state with our force action so it can be passed to
# _dsdt
s_augmented = np.append(s, torque)
ns = rk4(self._dsdt, s_augmented, [0, self.dt])
# only care about final timestep of integration returned by integrator
ns = ns[-1]
ns = ns[:4] # omit action
# ODEINT IS TOO SLOW!
# ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
# self.s_continuous = ns_continuous[-1] # We only care about the state
# at the ''final timestep'', self.dt
ns[0] = wrap(ns[0], -pi, pi)
ns[1] = wrap(ns[1], -pi, pi)
ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
self.state = ns
terminal = self._terminal()
reward = -1. if not terminal else 0.
return (self._get_ob(), reward, terminal, {})
def _get_ob(self):
s = self.state
return np.array([cos(s[0]), np.sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]])
def _terminal(self):
s = self.state
return bool(-np.cos(s[0]) - np.cos(s[1] + s[0]) > 1.)
def _dsdt(self, s_augmented, t):
m1 = self.LINK_MASS_1
m2 = self.LINK_MASS_2
l1 = self.LINK_LENGTH_1
lc1 = self.LINK_COM_POS_1
lc2 = self.LINK_COM_POS_2
I1 = self.LINK_MOI
I2 = self.LINK_MOI
g = 9.8
a = s_augmented[-1]
s = s_augmented[:-1]
theta1 = s[0]
theta2 = s[1]
dtheta1 = s[2]
dtheta2 = s[3]
d1 = m1 * lc1 ** 2 + m2 * \
(l1 ** 2 + lc2 ** 2 + 2 * l1 * lc2 * np.cos(theta2)) + I1 + I2
d2 = m2 * (lc2 ** 2 + l1 * lc2 * np.cos(theta2)) + I2
phi2 = m2 * lc2 * g * np.cos(theta1 + theta2 - np.pi / 2.)
phi1 = - m2 * l1 * lc2 * dtheta2 ** 2 * np.sin(theta2) \
- 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * np.sin(theta2) \
+ (m1 * lc1 + m2 * l1) * g * np.cos(theta1 - np.pi / 2) + phi2
if self.book_or_nips == "nips":
# the following line is consistent with the description in the
# paper
ddtheta2 = (a + d2 / d1 * phi1 - phi2) / \
(m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
else:
# the following line is consistent with the java implementation and the
# book
ddtheta2 = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1 ** 2 * np.sin(theta2) - phi2) \
/ (m2 * lc2 ** 2 + I2 - d2 ** 2 / d1)
ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
return (dtheta1, dtheta2, ddtheta1, ddtheta2, 0.)
def render(self, mode='human'):
from gym.envs.classic_control import rendering
s = self.state
if self.viewer is None:
self.viewer = rendering.Viewer(500,500)
bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default
self.viewer.set_bounds(-bound,bound,-bound,bound)
if s is None: return None
p1 = [-self.LINK_LENGTH_1 *
np.cos(s[0]), self.LINK_LENGTH_1 * np.sin(s[0])]
p2 = [p1[0] - self.LINK_LENGTH_2 * np.cos(s[0] + s[1]),
p1[1] + self.LINK_LENGTH_2 * np.sin(s[0] + s[1])]
xys = np.array([[0,0], p1, p2])[:,::-1]
thetas = [s[0]-np.pi/2, s[0]+s[1]-np.pi/2]
link_lengths = [self.LINK_LENGTH_1, self.LINK_LENGTH_2]
self.viewer.draw_line((-2.2, 1), (2.2, 1))
for ((x,y),th,llen) in zip(xys, thetas, link_lengths):
l,r,t,b = 0, llen, .1, -.1
jtransform = rendering.Transform(rotation=th, translation=(x,y))
link = self.viewer.draw_polygon([(l,b), (l,t), (r,t), (r,b)])
link.add_attr(jtransform)
link.set_color(0,.8, .8)
circ = self.viewer.draw_circle(.1)
circ.set_color(.8, .8, 0)
circ.add_attr(jtransform)
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
def wrap(x, m, M):
"""
:param x: a scalar
:param m: minimum possible value in range
:param M: maximum possible value in range
Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
"""
diff = M - m
while x > M:
x = x - diff
while x < m:
x = x + diff
return x
def bound(x, m, M=None):
"""
:param x: scalar
Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
"""
if M is None:
M = m[1]
m = m[0]
# bound x between min (m) and Max (M)
return min(max(x, m), M)
def rk4(derivs, y0, t, *args, **kwargs):
"""
Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta.
This is a toy implementation which may be useful if you find
yourself stranded on a system w/o scipy. Otherwise use
:func:`scipy.integrate`.
*y0*
initial state vector
*t*
sample times
*derivs*
returns the derivative of the system and has the
signature ``dy = derivs(yi, ti)``
*args*
additional arguments passed to the derivative function
*kwargs*
additional keyword arguments passed to the derivative function
Example 1 ::
## 2D system
def derivs6(x,t):
d1 = x[0] + 2*x[1]
d2 = -3*x[0] + 4*x[1]
return (d1, d2)
dt = 0.0005
t = arange(0.0, 2.0, dt)
y0 = (1,2)
yout = rk4(derivs6, y0, t)
Example 2::
## 1D system
alpha = 2
def derivs(x,t):
return -alpha*x + exp(-t)
y0 = 1
yout = rk4(derivs, y0, t)
If you have access to scipy, you should probably be using the
scipy.integrate tools rather than this function.
"""
try:
Ny = len(y0)
except TypeError:
yout = np.zeros((len(t),), np.float_)
else:
yout = np.zeros((len(t), Ny), np.float_)
yout[0] = y0
for i in np.arange(len(t) - 1):
thist = t[i]
dt = t[i + 1] - thist
dt2 = dt / 2.0
y0 = yout[i]
k1 = np.asarray(derivs(y0, thist, *args, **kwargs))
k2 = np.asarray(derivs(y0 + dt2 * k1, thist + dt2, *args, **kwargs))
k3 = np.asarray(derivs(y0 + dt2 * k2, thist + dt2, *args, **kwargs))
k4 = np.asarray(derivs(y0 + dt * k3, thist + dt, *args, **kwargs))
yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4)
return yout

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

View File

@ -0,0 +1,193 @@
"""
Classic cart-pole system implemented by Rich Sutton et al.
Copied from http://incompleteideas.net/sutton/book/code/pole.c
permalink: https://perma.cc/C9ZM-652R
"""
import math
import gym
from gym import spaces, logger
from gym.utils import seeding
import numpy as np
class CartPoleEnv(gym.Env):
"""
Description:
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum starts upright, and the goal is to prevent it from falling over by increasing and reducing the cart's velocity.
Source:
This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson
Observation:
Type: Box(4)
Num Observation Min Max
0 Cart Position -4.8 4.8
1 Cart Velocity -Inf Inf
2 Pole Angle -24° 24°
3 Pole Velocity At Tip -Inf Inf
Actions:
Type: Discrete(2)
Num Action
0 Push cart to the left
1 Push cart to the right
Note: The amount the velocity is reduced or increased is not fixed as it depends on the angle the pole is pointing. This is because the center of gravity of the pole increases the amount of energy needed to move the cart underneath it
Reward:
Reward is 1 for every step taken, including the termination step
Starting State:
All observations are assigned a uniform random value between ±0.05
Episode Termination:
Pole Angle is more than ±12°
Cart Position is more than ±2.4 (center of the cart reaches the edge of the display)
Episode length is greater than 200
Solved Requirements
Considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.
"""
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
def __init__(self):
self.gravity = 9.8
self.masscart = 1.0
self.masspole = 0.1
self.total_mass = (self.masspole + self.masscart)
self.length = 0.5 # actually half the pole's length
self.polemass_length = (self.masspole * self.length)
self.force_mag = 10.0
self.tau = 0.02 # seconds between state updates
self.kinematics_integrator = 'euler'
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation is still within bounds
high = np.array([
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max])
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
self.seed()
self.viewer = None
self.state = None
self.steps_beyond_done = None
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, action):
assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
state = self.state
x, x_dot, theta, theta_dot = state
force = self.force_mag if action==1 else -self.force_mag
costheta = math.cos(theta)
sintheta = math.sin(theta)
temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
if self.kinematics_integrator == 'euler':
x = x + self.tau * x_dot
x_dot = x_dot + self.tau * xacc
theta = theta + self.tau * theta_dot
theta_dot = theta_dot + self.tau * thetaacc
else: # semi-implicit euler
x_dot = x_dot + self.tau * xacc
x = x + self.tau * x_dot
theta_dot = theta_dot + self.tau * thetaacc
theta = theta + self.tau * theta_dot
self.state = (x,x_dot,theta,theta_dot)
done = x < -self.x_threshold \
or x > self.x_threshold \
or theta < -self.theta_threshold_radians \
or theta > self.theta_threshold_radians
done = bool(done)
if not done:
reward = 1.0
elif self.steps_beyond_done is None:
# Pole just fell!
self.steps_beyond_done = 0
reward = 1.0
else:
if self.steps_beyond_done == 0:
logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
self.steps_beyond_done += 1
reward = 0.0
return np.array(self.state), reward, done, {}
def reset(self):
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
self.steps_beyond_done = None
return np.array(self.state)
def render(self, mode='human'):
screen_width = 600
screen_height = 400
world_width = self.x_threshold*2
scale = screen_width/world_width
carty = 100 # TOP OF CART
polewidth = 10.0
polelen = scale * (2 * self.length)
cartwidth = 50.0
cartheight = 30.0
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
l,r,t,b = -cartwidth/2, cartwidth/2, cartheight/2, -cartheight/2
axleoffset =cartheight/4.0
cart = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
self.carttrans = rendering.Transform()
cart.add_attr(self.carttrans)
self.viewer.add_geom(cart)
l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
pole = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
pole.set_color(.8,.6,.4)
self.poletrans = rendering.Transform(translation=(0, axleoffset))
pole.add_attr(self.poletrans)
pole.add_attr(self.carttrans)
self.viewer.add_geom(pole)
self.axle = rendering.make_circle(polewidth/2)
self.axle.add_attr(self.poletrans)
self.axle.add_attr(self.carttrans)
self.axle.set_color(.5,.5,.8)
self.viewer.add_geom(self.axle)
self.track = rendering.Line((0,carty), (screen_width,carty))
self.track.set_color(0,0,0)
self.viewer.add_geom(self.track)
self._pole_geom = pole
if self.state is None: return None
# Edit the pole polygon vertex
pole = self._pole_geom
l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/2
pole.v = [(l,b), (l,t), (r,t), (r,b)]
x = self.state
cartx = x[0]*scale+screen_width/2.0 # MIDDLE OF CART
self.carttrans.set_translation(cartx, carty)
self.poletrans.set_rotation(-x[2])
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None

View File

@ -0,0 +1,148 @@
# -*- coding: utf-8 -*-
"""
@author: Olivier Sigaud
A merge between two sources:
* Adaptation of the MountainCar Environment from the "FAReinforcement" library
of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch'
and then modified by Arnaud de Broissia
* the OpenAI/gym MountainCar environment
itself from
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
class Continuous_MountainCarEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
def __init__(self):
self.min_action = -1.0
self.max_action = 1.0
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
self.power = 0.0015
self.low_state = np.array([self.min_position, -self.max_speed])
self.high_state = np.array([self.max_position, self.max_speed])
self.viewer = None
self.action_space = spaces.Box(low=self.min_action, high=self.max_action,
shape=(1,), dtype=np.float32)
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
dtype=np.float32)
self.seed()
self.reset()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, action):
position = self.state[0]
velocity = self.state[1]
force = min(max(action[0], -1.0), 1.0)
velocity += force*self.power -0.0025 * math.cos(3*position)
if (velocity > self.max_speed): velocity = self.max_speed
if (velocity < -self.max_speed): velocity = -self.max_speed
position += velocity
if (position > self.max_position): position = self.max_position
if (position < self.min_position): position = self.min_position
if (position==self.min_position and velocity<0): velocity = 0
done = bool(position >= self.goal_position)
reward = 0
if done:
reward = 100.0
reward-= math.pow(action[0],2)*0.1
self.state = np.array([position, velocity])
return self.state, reward, done, {}
def reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return np.array(self.state)
# def get_state(self):
# return self.state
def _height(self, xs):
return np.sin(3 * xs)*.45+.55
def render(self, mode='human'):
screen_width = 600
screen_height = 400
world_width = self.max_position - self.min_position
scale = screen_width/world_width
carwidth=40
carheight=20
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs-self.min_position)*scale, ys*scale))
self.track = rendering.make_polyline(xys)
self.track.set_linewidth(4)
self.viewer.add_geom(self.track)
clearance = 10
l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
car.add_attr(rendering.Transform(translation=(0, clearance)))
self.cartrans = rendering.Transform()
car.add_attr(self.cartrans)
self.viewer.add_geom(car)
frontwheel = rendering.make_circle(carheight/2.5)
frontwheel.set_color(.5, .5, .5)
frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
frontwheel.add_attr(self.cartrans)
self.viewer.add_geom(frontwheel)
backwheel = rendering.make_circle(carheight/2.5)
backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
backwheel.add_attr(self.cartrans)
backwheel.set_color(.5, .5, .5)
self.viewer.add_geom(backwheel)
flagx = (self.goal_position-self.min_position)*scale
flagy1 = self._height(self.goal_position)*scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
self.viewer.add_geom(flagpole)
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
flag.set_color(.8,.8,0)
self.viewer.add_geom(flag)
pos = self.state[0]
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
self.cartrans.set_rotation(math.cos(3 * pos))
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None

View File

@ -0,0 +1,121 @@
"""
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
class MountainCarEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
def __init__(self):
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.5
self.low = np.array([self.min_position, -self.max_speed])
self.high = np.array([self.max_position, self.max_speed])
self.viewer = None
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
self.seed()
self.reset()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, action):
assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
position, velocity = self.state
velocity += (action-1)*0.001 + math.cos(3*position)*(-0.0025)
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
position += velocity
position = np.clip(position, self.min_position, self.max_position)
if (position==self.min_position and velocity<0): velocity = 0
done = bool(position >= self.goal_position)
reward = -1.0
self.state = (position, velocity)
return np.array(self.state), reward, done, {}
def reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return np.array(self.state)
def _height(self, xs):
return np.sin(3 * xs)*.45+.55
def render(self, mode='human'):
screen_width = 600
screen_height = 400
world_width = self.max_position - self.min_position
scale = screen_width/world_width
carwidth=40
carheight=20
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs-self.min_position)*scale, ys*scale))
self.track = rendering.make_polyline(xys)
self.track.set_linewidth(4)
self.viewer.add_geom(self.track)
clearance = 10
l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
car.add_attr(rendering.Transform(translation=(0, clearance)))
self.cartrans = rendering.Transform()
car.add_attr(self.cartrans)
self.viewer.add_geom(car)
frontwheel = rendering.make_circle(carheight/2.5)
frontwheel.set_color(.5, .5, .5)
frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
frontwheel.add_attr(self.cartrans)
self.viewer.add_geom(frontwheel)
backwheel = rendering.make_circle(carheight/2.5)
backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
backwheel.add_attr(self.cartrans)
backwheel.set_color(.5, .5, .5)
self.viewer.add_geom(backwheel)
flagx = (self.goal_position-self.min_position)*scale
flagy1 = self._height(self.goal_position)*scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
self.viewer.add_geom(flagpole)
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
flag.set_color(.8,.8,0)
self.viewer.add_geom(flag)
pos = self.state[0]
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
self.cartrans.set_rotation(math.cos(3 * pos))
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None

View File

@ -0,0 +1,90 @@
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
class PendulumEnv(gym.Env):
metadata = {
'render.modes' : ['human', 'rgb_array'],
'video.frames_per_second' : 30
}
def __init__(self):
self.max_speed=8
self.max_torque=2.
self.dt=.05
self.viewer = None
high = np.array([1., 1., self.max_speed])
self.action_space = spaces.Box(low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32)
self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self,u):
th, thdot = self.state # th := theta
g = 10.
m = 1.
l = 1.
dt = self.dt
u = np.clip(u, -self.max_torque, self.max_torque)[0]
self.last_u = u # for rendering
costs = angle_normalize(th)**2 + .1*thdot**2 + .001*(u**2)
newthdot = thdot + (-3*g/(2*l) * np.sin(th + np.pi) + 3./(m*l**2)*u) * dt
newth = th + newthdot*dt
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) #pylint: disable=E1111
self.state = np.array([newth, newthdot])
return self._get_obs(), -costs, False, {}
def reset(self):
high = np.array([np.pi, 1])
self.state = self.np_random.uniform(low=-high, high=high)
self.last_u = None
return self._get_obs()
def _get_obs(self):
theta, thetadot = self.state
return np.array([np.cos(theta), np.sin(theta), thetadot])
def render(self, mode='human'):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500,500)
self.viewer.set_bounds(-2.2,2.2,-2.2,2.2)
rod = rendering.make_capsule(1, .2)
rod.set_color(.8, .3, .3)
self.pole_transform = rendering.Transform()
rod.add_attr(self.pole_transform)
self.viewer.add_geom(rod)
axle = rendering.make_circle(.05)
axle.set_color(0,0,0)
self.viewer.add_geom(axle)
fname = path.join(path.dirname(__file__), "assets/clockwise.png")
self.img = rendering.Image(fname, 1., 1.)
self.imgtrans = rendering.Transform()
self.img.add_attr(self.imgtrans)
self.viewer.add_onetime(self.img)
self.pole_transform.set_rotation(self.state[0] + np.pi/2)
if self.last_u:
self.imgtrans.scale = (-self.last_u/2, np.abs(self.last_u)/2)
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
def angle_normalize(x):
return (((x+np.pi) % (2*np.pi)) - np.pi)

View File

@ -0,0 +1,360 @@
"""
2D rendering framework
"""
from __future__ import division
import os
import six
import sys
if "Apple" in sys.version:
if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
# (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
from gym.utils import reraise
from gym import error
try:
import pyglet
except ImportError as e:
reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
try:
from pyglet.gl import *
except ImportError as e:
reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
import math
import numpy as np
RAD2DEG = 57.29577951308232
def get_display(spec):
"""Convert a display specification (such as :0) into an actual Display
object.
Pyglet only supports multiple Displays on Linux.
"""
if spec is None:
return None
elif isinstance(spec, six.string_types):
return pyglet.canvas.Display(spec)
else:
raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
class Viewer(object):
def __init__(self, width, height, display=None):
display = get_display(display)
self.width = width
self.height = height
self.window = pyglet.window.Window(width=width, height=height, display=display)
self.window.on_close = self.window_closed_by_user
self.isopen = True
self.geoms = []
self.onetime_geoms = []
self.transform = Transform()
glEnable(GL_BLEND)
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
def close(self):
self.window.close()
def window_closed_by_user(self):
self.isopen = False
def set_bounds(self, left, right, bottom, top):
assert right > left and top > bottom
scalex = self.width/(right-left)
scaley = self.height/(top-bottom)
self.transform = Transform(
translation=(-left*scalex, -bottom*scaley),
scale=(scalex, scaley))
def add_geom(self, geom):
self.geoms.append(geom)
def add_onetime(self, geom):
self.onetime_geoms.append(geom)
def render(self, return_rgb_array=False):
glClearColor(1,1,1,1)
self.window.clear()
self.window.switch_to()
self.window.dispatch_events()
self.transform.enable()
for geom in self.geoms:
geom.render()
for geom in self.onetime_geoms:
geom.render()
self.transform.disable()
arr = None
if return_rgb_array:
buffer = pyglet.image.get_buffer_manager().get_color_buffer()
image_data = buffer.get_image_data()
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
# In https://github.com/openai/gym-http-api/issues/2, we
# discovered that someone using Xmonad on Arch was having
# a window of size 598 x 398, though a 600 x 400 window
# was requested. (Guess Xmonad was preserving a pixel for
# the boundary.) So we use the buffer height/width rather
# than the requested one.
arr = arr.reshape(buffer.height, buffer.width, 4)
arr = arr[::-1,:,0:3]
self.window.flip()
self.onetime_geoms = []
return arr if return_rgb_array else self.isopen
# Convenience
def draw_circle(self, radius=10, res=30, filled=True, **attrs):
geom = make_circle(radius=radius, res=res, filled=filled)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def draw_polygon(self, v, filled=True, **attrs):
geom = make_polygon(v=v, filled=filled)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def draw_polyline(self, v, **attrs):
geom = make_polyline(v=v)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def draw_line(self, start, end, **attrs):
geom = Line(start, end)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def get_array(self):
self.window.flip()
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
self.window.flip()
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
arr = arr.reshape(self.height, self.width, 4)
return arr[::-1,:,0:3]
def __del__(self):
self.close()
def _add_attrs(geom, attrs):
if "color" in attrs:
geom.set_color(*attrs["color"])
if "linewidth" in attrs:
geom.set_linewidth(attrs["linewidth"])
class Geom(object):
def __init__(self):
self._color=Color((0, 0, 0, 1.0))
self.attrs = [self._color]
def render(self):
for attr in reversed(self.attrs):
attr.enable()
self.render1()
for attr in self.attrs:
attr.disable()
def render1(self):
raise NotImplementedError
def add_attr(self, attr):
self.attrs.append(attr)
def set_color(self, r, g, b):
self._color.vec4 = (r, g, b, 1)
class Attr(object):
def enable(self):
raise NotImplementedError
def disable(self):
pass
class Transform(Attr):
def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)):
self.set_translation(*translation)
self.set_rotation(rotation)
self.set_scale(*scale)
def enable(self):
glPushMatrix()
glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
glScalef(self.scale[0], self.scale[1], 1)
def disable(self):
glPopMatrix()
def set_translation(self, newx, newy):
self.translation = (float(newx), float(newy))
def set_rotation(self, new):
self.rotation = float(new)
def set_scale(self, newx, newy):
self.scale = (float(newx), float(newy))
class Color(Attr):
def __init__(self, vec4):
self.vec4 = vec4
def enable(self):
glColor4f(*self.vec4)
class LineStyle(Attr):
def __init__(self, style):
self.style = style
def enable(self):
glEnable(GL_LINE_STIPPLE)
glLineStipple(1, self.style)
def disable(self):
glDisable(GL_LINE_STIPPLE)
class LineWidth(Attr):
def __init__(self, stroke):
self.stroke = stroke
def enable(self):
glLineWidth(self.stroke)
class Point(Geom):
def __init__(self):
Geom.__init__(self)
def render1(self):
glBegin(GL_POINTS) # draw point
glVertex3f(0.0, 0.0, 0.0)
glEnd()
class FilledPolygon(Geom):
def __init__(self, v):
Geom.__init__(self)
self.v = v
def render1(self):
if len(self.v) == 4 : glBegin(GL_QUADS)
elif len(self.v) > 4 : glBegin(GL_POLYGON)
else: glBegin(GL_TRIANGLES)
for p in self.v:
glVertex3f(p[0], p[1],0) # draw each vertex
glEnd()
def make_circle(radius=10, res=30, filled=True):
points = []
for i in range(res):
ang = 2*math.pi*i / res
points.append((math.cos(ang)*radius, math.sin(ang)*radius))
if filled:
return FilledPolygon(points)
else:
return PolyLine(points, True)
def make_polygon(v, filled=True):
if filled: return FilledPolygon(v)
else: return PolyLine(v, True)
def make_polyline(v):
return PolyLine(v, False)
def make_capsule(length, width):
l, r, t, b = 0, length, width/2, -width/2
box = make_polygon([(l,b), (l,t), (r,t), (r,b)])
circ0 = make_circle(width/2)
circ1 = make_circle(width/2)
circ1.add_attr(Transform(translation=(length, 0)))
geom = Compound([box, circ0, circ1])
return geom
class Compound(Geom):
def __init__(self, gs):
Geom.__init__(self)
self.gs = gs
for g in self.gs:
g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
def render1(self):
for g in self.gs:
g.render()
class PolyLine(Geom):
def __init__(self, v, close):
Geom.__init__(self)
self.v = v
self.close = close
self.linewidth = LineWidth(1)
self.add_attr(self.linewidth)
def render1(self):
glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
for p in self.v:
glVertex3f(p[0], p[1],0) # draw each vertex
glEnd()
def set_linewidth(self, x):
self.linewidth.stroke = x
class Line(Geom):
def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
Geom.__init__(self)
self.start = start
self.end = end
self.linewidth = LineWidth(1)
self.add_attr(self.linewidth)
def render1(self):
glBegin(GL_LINES)
glVertex2f(*self.start)
glVertex2f(*self.end)
glEnd()
class Image(Geom):
def __init__(self, fname, width, height):
Geom.__init__(self)
self.width = width
self.height = height
img = pyglet.image.load(fname)
self.img = img
self.flip = False
def render1(self):
self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height)
# ================================================================
class SimpleImageViewer(object):
def __init__(self, display=None, maxwidth=500):
self.window = None
self.isopen = False
self.display = display
self.maxwidth = maxwidth
def imshow(self, arr):
if self.window is None:
height, width, _channels = arr.shape
if width > self.maxwidth:
scale = self.maxwidth / width
width = int(scale * width)
height = int(scale * height)
self.window = pyglet.window.Window(width=width, height=height,
display=self.display, vsync=False, resizable=True)
self.width = width
self.height = height
self.isopen = True
@self.window.event
def on_resize(width, height):
self.width = width
self.height = height
@self.window.event
def on_close():
self.isopen = False
assert len(arr.shape) == 3, "You passed in an image with the wrong number shape"
image = pyglet.image.ImageData(arr.shape[1], arr.shape[0],
'RGB', arr.tobytes(), pitch=arr.shape[1]*-3)
gl.glTexParameteri(gl.GL_TEXTURE_2D,
gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
texture = image.get_texture()
texture.width = self.width
texture.height = self.height
self.window.clear()
self.window.switch_to()
self.window.dispatch_events()
texture.blit(0, 0) # draw
self.window.flip()
def close(self):
if self.isopen and sys.meta_path:
# ^^^ check sys.meta_path to avoid 'ImportError: sys.meta_path is None, Python is likely shutting down'
self.window.close()
self.isopen = False
def __del__(self):
self.close()

View File

@ -0,0 +1,16 @@
from gym.envs.mujoco.mujoco_env import MujocoEnv
# ^^^^^ so that user gets the correct error
# message if mujoco is not installed correctly
from gym.envs.mujoco.ant import AntEnv
from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
from gym.envs.mujoco.hopper import HopperEnv
from gym.envs.mujoco.walker2d import Walker2dEnv
from gym.envs.mujoco.humanoid import HumanoidEnv
from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
from gym.envs.mujoco.reacher import ReacherEnv
from gym.envs.mujoco.swimmer import SwimmerEnv
from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
from gym.envs.mujoco.pusher import PusherEnv
from gym.envs.mujoco.thrower import ThrowerEnv
from gym.envs.mujoco.striker import StrikerEnv

View File

@ -0,0 +1,45 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
utils.EzPickle.__init__(self)
def step(self, a):
xposbefore = self.get_body_com("torso")[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.get_body_com("torso")[0]
forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
notdone = np.isfinite(state).all() \
and state[2] >= 0.2 and state[2] <= 1.0
done = not notdone
ob = self._get_obs()
return ob, reward, done, dict(
reward_forward=forward_reward,
reward_ctrl=-ctrl_cost,
reward_contact=-contact_cost,
reward_survive=survive_reward)
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat,
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
])
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@ -0,0 +1,81 @@
<mujoco model="ant">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.01"/>
<custom>
<numeric data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" name="init_qpos"/>
</custom>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="0" condim="3" density="5.0" friction="1 0.5 0.5" margin="0.01" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0.75">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<geom name="torso_geom" pos="0 0 0" size="0.25" type="sphere"/>
<joint armature="0" damping="0" limited="false" margin="0.01" name="root" pos="0 0 0" type="free"/>
<body name="front_left_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="aux_1_geom" size="0.08" type="capsule"/>
<body name="aux_1" pos="0.2 0.2 0">
<joint axis="0 0 1" name="hip_1" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 0.2 0.0" name="left_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 0.2 0">
<joint axis="-1 1 0" name="ankle_1" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 0.4 0.0" name="left_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="front_right_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="aux_2_geom" size="0.08" type="capsule"/>
<body name="aux_2" pos="-0.2 0.2 0">
<joint axis="0 0 1" name="hip_2" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 0.2 0.0" name="right_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 0.2 0">
<joint axis="1 1 0" name="ankle_2" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 0.4 0.0" name="right_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="aux_3_geom" size="0.08" type="capsule"/>
<body name="aux_3" pos="-0.2 -0.2 0">
<joint axis="0 0 1" name="hip_3" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" name="back_leg_geom" size="0.08" type="capsule"/>
<body pos="-0.2 -0.2 0">
<joint axis="-1 1 0" name="ankle_3" pos="0.0 0.0 0.0" range="-70 -30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" name="third_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
<body name="right_back_leg" pos="0 0 0">
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="aux_4_geom" size="0.08" type="capsule"/>
<body name="aux_4" pos="0.2 -0.2 0">
<joint axis="0 0 1" name="hip_4" pos="0.0 0.0 0.0" range="-30 30" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.2 -0.2 0.0" name="rightback_leg_geom" size="0.08" type="capsule"/>
<body pos="0.2 -0.2 0">
<joint axis="1 1 0" name="ankle_4" pos="0.0 0.0 0.0" range="30 70" type="hinge"/>
<geom fromto="0.0 0.0 0.0 0.4 -0.4 0.0" name="fourth_ankle_geom" size="0.08" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_4" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_1" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_2" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="hip_3" gear="150"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="ankle_3" gear="150"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,96 @@
<!-- Cheetah Model
The state space is populated with joints in the order that they are
defined in this file. The actuators also operate on joints.
State-Space (name/joint/parameter):
- rootx slider position (m)
- rootz slider position (m)
- rooty hinge angle (rad)
- bthigh hinge angle (rad)
- bshin hinge angle (rad)
- bfoot hinge angle (rad)
- fthigh hinge angle (rad)
- fshin hinge angle (rad)
- ffoot hinge angle (rad)
- rootx slider velocity (m/s)
- rootz slider velocity (m/s)
- rooty hinge angular velocity (rad/s)
- bthigh hinge angular velocity (rad/s)
- bshin hinge angular velocity (rad/s)
- bfoot hinge angular velocity (rad/s)
- fthigh hinge angular velocity (rad/s)
- fshin hinge angular velocity (rad/s)
- ffoot hinge angular velocity (rad/s)
Actuators (name/actuator/parameter):
- bthigh hinge torque (N m)
- bshin hinge torque (N m)
- bfoot hinge torque (N m)
- fthigh hinge torque (N m)
- fshin hinge torque (N m)
- ffoot hinge torque (N m)
-->
<mujoco model="cheetah">
<compiler angle="radian" coordinate="local" inertiafromgeom="true" settotalmass="14"/>
<default>
<joint armature=".1" damping=".01" limited="true" solimplimit="0 .8 .03" solreflimit=".02 1" stiffness="8"/>
<geom conaffinity="0" condim="3" contype="1" friction=".4 .1 .1" rgba="0.8 0.6 .4 1" solimp="0.0 0.8 0.01" solref="0.02 1"/>
<motor ctrllimited="true" ctrlrange="-1 1"/>
</default>
<size nstack="300000" nuser_geom="1"/>
<option gravity="0 0 -9.81" timestep="0.01"/>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 .7">
<camera name="track" mode="trackcom" pos="0 -3 0.3" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 0" stiffness="0" type="hinge"/>
<geom fromto="-.5 0 0 .5 0 0" name="torso" size="0.046" type="capsule"/>
<geom axisangle="0 1 0 .87" name="head" pos=".6 0 .1" size="0.046 .15" type="capsule"/>
<!-- <site name='tip' pos='.15 0 .11'/>-->
<body name="bthigh" pos="-.5 0 0">
<joint axis="0 1 0" damping="6" name="bthigh" pos="0 0 0" range="-.52 1.05" stiffness="240" type="hinge"/>
<geom axisangle="0 1 0 -3.8" name="bthigh" pos=".1 0 -.13" size="0.046 .145" type="capsule"/>
<body name="bshin" pos=".16 0 -.25">
<joint axis="0 1 0" damping="4.5" name="bshin" pos="0 0 0" range="-.785 .785" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 -2.03" name="bshin" pos="-.14 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .15" type="capsule"/>
<body name="bfoot" pos="-.28 0 -.14">
<joint axis="0 1 0" damping="3" name="bfoot" pos="0 0 0" range="-.4 .785" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.27" name="bfoot" pos=".03 0 -.097" rgba="0.9 0.6 0.6 1" size="0.046 .094" type="capsule"/>
</body>
</body>
</body>
<body name="fthigh" pos=".5 0 0">
<joint axis="0 1 0" damping="4.5" name="fthigh" pos="0 0 0" range="-1 .7" stiffness="180" type="hinge"/>
<geom axisangle="0 1 0 .52" name="fthigh" pos="-.07 0 -.12" size="0.046 .133" type="capsule"/>
<body name="fshin" pos="-.14 0 -.24">
<joint axis="0 1 0" damping="3" name="fshin" pos="0 0 0" range="-1.2 .87" stiffness="120" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="fshin" pos=".065 0 -.09" rgba="0.9 0.6 0.6 1" size="0.046 .106" type="capsule"/>
<body name="ffoot" pos=".13 0 -.18">
<joint axis="0 1 0" damping="1.5" name="ffoot" pos="0 0 0" range="-.5 .5" stiffness="60" type="hinge"/>
<geom axisangle="0 1 0 -.6" name="ffoot" pos=".045 0 -.07" rgba="0.9 0.6 0.6 1" size="0.046 .07" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor gear="120" joint="bthigh" name="bthigh"/>
<motor gear="90" joint="bshin" name="bshin"/>
<motor gear="60" joint="bfoot" name="bfoot"/>
<motor gear="120" joint="fthigh" name="fthigh"/>
<motor gear="60" joint="fshin" name="fshin"/>
<motor gear="30" joint="ffoot" name="ffoot"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,48 @@
<mujoco model="hopper">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.13/2 0 0.1">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,121 @@
<mujoco model="humanoid">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
<!-- <flags solverstat="enable" energy="enable"/>-->
</option>
<size nkey="5" nuser_geom="1"/>
<visual>
<map fogend="5" fogstart="3"/>
</visual>
<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
<body name="torso" pos="0 0 1.4">
<camera name="track" mode="trackcom" pos="0 -4 0" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
<geom name="head" pos="0 0 .19" size=".09" type="sphere" user="258"/>
<geom fromto="-.01 -.06 -.12 -.01 .06 -.12" name="uwaist" size="0.06" type="capsule"/>
<body name="lwaist" pos="-.01 0 -0.260" quat="1.000 0 -0.002 0">
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
<body name="pelvis" pos="0 0 -0.165" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
<body name="right_thigh" pos="0 -0.1 -0.04">
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 0.01 -.34" name="right_thigh1" size="0.06" type="capsule"/>
<body name="right_shin" pos="0 0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="right_shin1" size="0.049" type="capsule"/>
<body name="right_foot" pos="0 0 -0.45">
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
</body>
</body>
</body>
<body name="left_thigh" pos="0 0.1 -0.04">
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0 -0.01 -.34" name="left_thigh1" size="0.06" type="capsule"/>
<body name="left_shin" pos="0 -0.01 -0.403">
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 0 0 -.3" name="left_shin1" size="0.049" type="capsule"/>
<body name="left_foot" pos="0 0 -0.45">
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
<camera pos="0 0 0"/>
</body>
</body>
<body name="left_upper_arm" pos="0 0.17 0.06">
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
</body>
</body>
</body>
</worldbody>
<tendon>
<fixed name="left_hipknee">
<joint coef="-1" joint="left_hip_y"/>
<joint coef="1" joint="left_knee"/>
</fixed>
<fixed name="right_hipknee">
<joint coef="-1" joint="right_hip_y"/>
<joint coef="1" joint="right_knee"/>
</fixed>
</tendon>
<actuator>
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
<motor gear="200" joint="right_knee" name="right_knee"/>
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
<motor gear="200" joint="left_knee" name="left_knee"/>
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
<motor gear="25" joint="right_elbow" name="right_elbow"/>
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
<motor gear="25" joint="left_elbow" name="left_elbow"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,121 @@
<mujoco model="humanoidstandup">
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" iterations="50" solver="PGS" timestep="0.003">
<!-- <flags solverstat="enable" energy="enable"/>-->
</option>
<size nkey="5" nuser_geom="1"/>
<visual>
<map fogend="5" fogstart="3"/>
</visual>
<asset>
<texture builtin="gradient" height="100" rgb1=".4 .5 .6" rgb2="0 0 0" type="skybox" width="100"/>
<!-- <texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>-->
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom condim="3" friction="1 .1 .1" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 0.125" type="plane"/>
<!-- <geom condim="3" material="MatPlane" name="floor" pos="0 0 0" size="10 10 0.125" type="plane"/>-->
<body name="torso" pos="0 0 .105">
<camera name="track" mode="trackcom" pos="0 -3 .5" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" damping="0" limited="false" name="root" pos="0 0 0" stiffness="0" type="free"/>
<geom fromto="0 -.07 0 0 .07 0" name="torso1" size="0.07" type="capsule"/>
<geom name="head" pos="-.15 0 0" size=".09" type="sphere" user="258"/>
<geom fromto=".11 -.06 0 .11 .06 0" name="uwaist" size="0.06" type="capsule"/>
<body name="lwaist" pos=".21 0 0" quat="1.000 0 -0.002 0">
<geom fromto="0 -.06 0 0 .06 0" name="lwaist" size="0.06" type="capsule"/>
<joint armature="0.02" axis="0 0 1" damping="5" name="abdomen_z" pos="0 0 0.065" range="-45 45" stiffness="20" type="hinge"/>
<joint armature="0.02" axis="0 1 0" damping="5" name="abdomen_y" pos="0 0 0.065" range="-75 30" stiffness="10" type="hinge"/>
<body name="pelvis" pos="0.165 0 0" quat="1.000 0 -0.002 0">
<joint armature="0.02" axis="1 0 0" damping="5" name="abdomen_x" pos="0 0 0.1" range="-35 35" stiffness="10" type="hinge"/>
<geom fromto="-.02 -.07 0 -.02 .07 0" name="butt" size="0.09" type="capsule"/>
<body name="right_thigh" pos="0 -0.1 0">
<joint armature="0.01" axis="1 0 0" damping="5" name="right_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 1" damping="5" name="right_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.0080" axis="0 1 0" damping="5" name="right_hip_y" pos="0 0 0" range="-110 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0.34 0.01 0" name="right_thigh1" size="0.06" type="capsule"/>
<body name="right_shin" pos="0.403 0.01 0">
<joint armature="0.0060" axis="0 -1 0" name="right_knee" pos="0 0 .02" range="-160 -2" type="hinge"/>
<geom fromto="0 0 0 0.3 0 0" name="right_shin1" size="0.049" type="capsule"/>
<body name="right_foot" pos="0.35 0 -.10">
<geom name="right_foot" pos="0 0 0.1" size="0.075" type="sphere" user="0"/>
</body>
</body>
</body>
<body name="left_thigh" pos="0 0.1 0">
<joint armature="0.01" axis="-1 0 0" damping="5" name="left_hip_x" pos="0 0 0" range="-25 5" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 0 -1" damping="5" name="left_hip_z" pos="0 0 0" range="-60 35" stiffness="10" type="hinge"/>
<joint armature="0.01" axis="0 1 0" damping="5" name="left_hip_y" pos="0 0 0" range="-120 20" stiffness="20" type="hinge"/>
<geom fromto="0 0 0 0.34 -0.01 0" name="left_thigh1" size="0.06" type="capsule"/>
<body name="left_shin" pos="0.403 -0.01 0">
<joint armature="0.0060" axis="0 -1 0" name="left_knee" pos="0 0 .02" range="-160 -2" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 0.3 0 0" name="left_shin1" size="0.049" type="capsule"/>
<body name="left_foot" pos="0.35 0 -.1">
<geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
</body>
</body>
</body>
</body>
</body>
<body name="right_upper_arm" pos="0 -0.17 0.06">
<joint armature="0.0068" axis="2 1 1" name="right_shoulder1" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 -1 1" name="right_shoulder2" pos="0 0 0" range="-85 60" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 -.16 -.16" name="right_uarm1" size="0.04 0.16" type="capsule"/>
<body name="right_lower_arm" pos=".18 -.18 -.18">
<joint armature="0.0028" axis="0 -1 1" name="right_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 0.01 0.01 .17 .17 .17" name="right_larm" size="0.031" type="capsule"/>
<geom name="right_hand" pos=".18 .18 .18" size="0.04" type="sphere"/>
<camera pos="0 0 0"/>
</body>
</body>
<body name="left_upper_arm" pos="0 0.17 0.06">
<joint armature="0.0068" axis="2 -1 1" name="left_shoulder1" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<joint armature="0.0051" axis="0 1 1" name="left_shoulder2" pos="0 0 0" range="-60 85" stiffness="1" type="hinge"/>
<geom fromto="0 0 0 .16 .16 -.16" name="left_uarm1" size="0.04 0.16" type="capsule"/>
<body name="left_lower_arm" pos=".18 .18 -.18">
<joint armature="0.0028" axis="0 -1 -1" name="left_elbow" pos="0 0 0" range="-90 50" stiffness="0" type="hinge"/>
<geom fromto="0.01 -0.01 0.01 .17 -.17 .17" name="left_larm" size="0.031" type="capsule"/>
<geom name="left_hand" pos=".18 -.18 .18" size="0.04" type="sphere"/>
</body>
</body>
</body>
</worldbody>
<tendon>
<fixed name="left_hipknee">
<joint coef="-1" joint="left_hip_y"/>
<joint coef="1" joint="left_knee"/>
</fixed>
<fixed name="right_hipknee">
<joint coef="-1" joint="right_hip_y"/>
<joint coef="1" joint="right_knee"/>
</fixed>
</tendon>
<actuator>
<motor gear="100" joint="abdomen_y" name="abdomen_y"/>
<motor gear="100" joint="abdomen_z" name="abdomen_z"/>
<motor gear="100" joint="abdomen_x" name="abdomen_x"/>
<motor gear="100" joint="right_hip_x" name="right_hip_x"/>
<motor gear="100" joint="right_hip_z" name="right_hip_z"/>
<motor gear="300" joint="right_hip_y" name="right_hip_y"/>
<motor gear="200" joint="right_knee" name="right_knee"/>
<motor gear="100" joint="left_hip_x" name="left_hip_x"/>
<motor gear="100" joint="left_hip_z" name="left_hip_z"/>
<motor gear="300" joint="left_hip_y" name="left_hip_y"/>
<motor gear="200" joint="left_knee" name="left_knee"/>
<motor gear="25" joint="right_shoulder1" name="right_shoulder1"/>
<motor gear="25" joint="right_shoulder2" name="right_shoulder2"/>
<motor gear="25" joint="right_elbow" name="right_elbow"/>
<motor gear="25" joint="left_shoulder1" name="left_shoulder1"/>
<motor gear="25" joint="left_shoulder2" name="left_shoulder2"/>
<motor gear="25" joint="left_elbow" name="left_elbow"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,47 @@
<!-- Cartpole Model
The state space is populated with joints in the order that they are
defined in this file. The actuators also operate on joints.
State-Space (name/joint/parameter):
- cart slider position (m)
- pole hinge angle (rad)
- cart slider velocity (m/s)
- pole hinge angular velocity (rad/s)
Actuators (name/actuator/parameter):
- cart motor force x (N)
-->
<mujoco model="cartpole">
<compiler coordinate="local" inertiafromgeom="true"/>
<custom>
<numeric data="2" name="frame_skip"/>
</custom>
<default>
<joint damping="0.05"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
</default>
<option gravity="1e-5 0 -9.81" integrator="RK4" timestep="0.01"/>
<size nstack="3000"/>
<worldbody>
<geom name="floor" pos="0 0 -3.0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
<body name="cart" pos="0 0 0">
<joint axis="1 0 0" limited="true" margin="0.01" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
<body name="pole" pos="0 0 0">
<joint axis="0 1 0" name="hinge" pos="0 0 0" type="hinge"/>
<geom fromto="0 0 0 0 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
<body name="pole2" pos="0 0 0.6">
<joint axis="0 1 0" name="hinge2" pos="0 0 0" type="hinge"/>
<geom fromto="0 0 0 0 0 0.6" name="cpole2" rgba="0 0.7 0.7 1" size="0.045 0.3" type="capsule"/>
<site name="tip" pos="0 0 .6" size="0.01 0.01"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="500" joint="slider" name="slide"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,27 @@
<mujoco model="inverted pendulum">
<compiler inertiafromgeom="true"/>
<default>
<joint armature="0" damping="1" limited="true"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
<tendon/>
<motor ctrlrange="-3 3"/>
</default>
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.02"/>
<size nstack="3000"/>
<worldbody>
<!--geom name="ground" type="plane" pos="0 0 0" /-->
<geom name="rail" pos="0 0 0" quat="0.707 0 0.707 0" rgba="0.3 0.3 0.7 1" size="0.02 1" type="capsule"/>
<body name="cart" pos="0 0 0">
<joint axis="1 0 0" limited="true" name="slider" pos="0 0 0" range="-1 1" type="slide"/>
<geom name="cart" pos="0 0 0" quat="0.707 0 0.707 0" size="0.1 0.1" type="capsule"/>
<body name="pole" pos="0 0 0">
<joint axis="0 1 0" name="hinge" pos="0 0 0" range="-90 90" type="hinge"/>
<geom fromto="0 0 0 0.001 0 0.6" name="cpole" rgba="0 0.7 0.7 1" size="0.049 0.3" type="capsule"/>
<!-- <body name="pole2" pos="0.001 0 0.6"><joint name="hinge2" type="hinge" pos="0 0 0" axis="0 1 0"/><geom name="cpole2" type="capsule" fromto="0 0 0 0 0 0.6" size="0.05 0.3" rgba="0.7 0 0.7 1"/><site name="tip2" pos="0 0 .6"/></body>-->
</body>
</body>
</worldbody>
<actuator>
<motor gear="100" joint="slider" name="slide"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,31 @@
<mujoco>
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.02"/>
<default>
<joint armature="0" damping="0" limited="false"/>
<geom conaffinity="0" condim="3" density="100" friction="1 0.5 0.5" margin="0" rgba="0.8 0.6 0.4 1"/>
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane"/>
<body name="torso" pos="0 0 0">
<geom name="pointbody" pos="0 0 0.5" size="0.5" type="sphere"/>
<geom name="pointarrow" pos="0.6 0 0.5" size="0.5 0.1 0.1" type="box"/>
<joint axis="1 0 0" name="ballx" pos="0 0 0" type="slide"/>
<joint axis="0 1 0" name="bally" pos="0 0 0" type="slide"/>
<joint axis="0 0 1" limited="false" name="rot" pos="0 0 0" type="hinge"/>
</body>
</worldbody>
<actuator>
<!-- Those are just dummy actuators for providing ranges -->
<motor ctrllimited="true" ctrlrange="-1 1" joint="ballx"/>
<motor ctrllimited="true" ctrlrange="-0.25 0.25" joint="rot"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,91 @@
<mujoco model="arm3d">
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
<option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
<default>
<joint armature='0.04' damping="1" limited="true"/>
<geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
</default>
<worldbody>
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
<geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
<body name="r_shoulder_lift_link" pos="0.1 0 0">
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
<body name="r_upper_arm_roll_link" pos="0 0 0">
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
<body name="r_upper_arm_link" pos="0 0 0">
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
<body name="r_elbow_flex_link" pos="0.4 0 0">
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
<body name="r_forearm_roll_link" pos="0 0 0">
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
<body name="r_forearm_link" pos="0 0 0">
<geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
<body name="r_wrist_flex_link" pos="0.321 0 0">
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
<body name="r_wrist_roll_link" pos="0 0 0">
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
<body name="tips_arm" pos="0 0 0">
<geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
<geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
</body>
<geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
<geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
<geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<!--<body name="object" pos="0.55 -0.3 -0.275" >-->
<body name="object" pos="0.45 -0.05 -0.275" >
<geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
<geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
<joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
<body name="goal" pos="0.45 -0.05 -0.3230">
<geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
<joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
</worldbody>
<actuator>
<motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
<motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,39 @@
<mujoco model="reacher">
<compiler angle="radian" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
</default>
<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
<worldbody>
<!-- Arena -->
<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto=" .3 -.3 .01 .3 .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto="-.3 .3 .01 .3 .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<!-- Arm -->
<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
<body name="body0" pos="0 0 .01">
<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
<joint axis="0 0 1" limited="false" name="joint0" pos="0 0 0" type="hinge"/>
<body name="body1" pos="0.1 0 0">
<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-3.0 3.0" type="hinge"/>
<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
<body name="fingertip" pos="0.11 0 0">
<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
</body>
</body>
</body>
<!-- Target -->
<body name="target" pos=".1 -.1 .01">
<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,101 @@
<mujoco model="arm3d">
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
<option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler"/>
<default>
<joint armature='0.04' damping="1" limited="true"/>
<geom friction=".0 .0 .0" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
</default>
<worldbody>
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
<geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05"/>
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05"/>
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03"/>
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03"/>
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />
<body name="r_shoulder_lift_link" pos="0.1 0 0">
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
<body name="r_upper_arm_roll_link" pos="0 0 0">
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
<body name="r_upper_arm_link" pos="0 0 0">
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
<body name="r_elbow_flex_link" pos="0.4 0 0">
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
<body name="r_forearm_roll_link" pos="0 0 0">
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
<body name="r_forearm_link" pos="0 0 0">
<geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
<body name="r_wrist_flex_link" pos="0.321 0 0">
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
<body name="r_wrist_roll_link" pos="0 0 0">
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
<body name="tips_arm" pos="0 0 0">
<geom conaffinity="1" contype="1" name="tip_arml" pos="0.017 0 0" size="0.003 0.12 0.05" type="box" />
</body>
<geom conaffinity="1" contype="1" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" type="capsule" />
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<body name="object" pos="0 0 -0.270" >
<geom type="sphere" rgba="1 1 1 1" pos="0 0 0" size="0.05 0.05 0.05" contype="1" conaffinity="0"/>
<joint name="obj_slidey" armature="0.1" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
<joint name="obj_slidex" armature="0.1" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
</body>
<body name="goal" pos="0.0 0.0 -0.3230">
<geom rgba="1. 1. 1. 0" pos="0 0 0" type="box" size="0.01 0.01 0.01" contype="0" conaffinity="0"/>
<body pos="0 0 0">
<geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
</body>
<body name="coaster" pos="0 0 0">
<geom rgba="1. 1. 1. 1" type="cylinder" size="0.08 0.001 0.1" density='1000000' contype="0" conaffinity="0"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 0.785">
<geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 -0.785">
<geom rgba="1. 1. 1. 1" pos="0 0.075 0.04" type="box" size="0.032 0.001 0.04" contype="0" conaffinity="1"/>
</body>
<joint name="goal_free" type="free" pos="0 0 0" limited="false" damping="0"/>
</body>
</worldbody>
<actuator>
<motor joint="r_shoulder_pan_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_shoulder_lift_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_upper_arm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_elbow_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_forearm_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_wrist_flex_joint" ctrlrange="-3.0 3.0" ctrllimited="true" />
<motor joint="r_wrist_roll_joint" ctrlrange="-3.0 3.0" ctrllimited="true"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,38 @@
<mujoco model="swimmer">
<compiler angle="degree" coordinate="local" inertiafromgeom="true"/>
<option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1"/>
<default>
<geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1"/>
<joint armature='0.1' />
</default>
<asset>
<texture builtin="gradient" height="100" rgb1="1 1 1" rgb2="0 0 0" type="skybox" width="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="30 30" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane"/>
<!-- ================= SWIMMER ================= /-->
<body name="torso" pos="0 0 0">
<geom density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule"/>
<joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide"/>
<joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide"/>
<joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge"/>
<body name="mid" pos="0.5 0 0">
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
<joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge"/>
<body name="back" pos="-1 0 0">
<geom density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule"/>
<joint axis="0 0 1" limited="true" name="rot3" pos="0 0 0" range="-100 100" type="hinge"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2"/>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot3"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,127 @@
<mujoco model="arm3d">
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
<option timestep="0.01" gravity="0 0 -9.81" iterations="20" integrator="Euler"/>
<default>
<joint armature='0.75' damping="1" limited="true"/>
<geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
</default>
<worldbody>
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
<geom type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" density="0.0001"/>
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" density="0.0001"/>
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" density="0.0001"/>
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" density="0.0001"/>
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" density="1"/>
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-0.4854 1.214602" damping="1.0"/>
<body name="r_shoulder_lift_link" pos="0.1 0 0">
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" density="0.0001"/>
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 0.7963" damping="1.0"/>
<body name="r_upper_arm_roll_link" pos="0 0 0">
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="1.0"/>
<body name="r_upper_arm_link" pos="0 0 0">
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" density="0.0001"/>
<body name="r_elbow_flex_link" pos="0.4 0 0">
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" density="0.0001"/>
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.7 0.7" damping="1.0"/>
<body name="r_forearm_roll_link" pos="0 0 0">
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping="1.0" range="-1.5 1.5"/>
<body name="r_forearm_link" pos="0 0 0" axisangle="1 0 0 0.392">
<geom name="fa" type="capsule" fromto="0 0 0 0 0 0.291" size="0.05" density="0.0001"/>
<body name="r_wrist_flex_link" pos="0 0 0.321" axisangle="0 0 1 1.57">
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" density="0.0001"/>
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-1.0 1.0" damping="1.0" />
<body name="r_wrist_roll_link" pos="0 0 0" axisangle="0 1 0 -1.178">
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="0 1 0" damping="1.0" range="0 2.25"/>
<geom type="capsule" fromto="0 -0.05 0 0 0.05 0" size="0.01" contype="1" conaffinity="1" density="0.0001"/>
<body pos="0 0 0" axisangle="0 0 1 0.392">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density=".0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 1.57">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 1.178">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 0.785">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 1.96">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 2.355">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
<body pos="0 0 0" axisangle="0 0 1 2.74">
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
</body>
<body name="goal" pos="0.575 0.5 -0.328">
<geom rgba="1 1 1 1" type="box" pos="0 0 0.005" size="0.075 0.075 0.001" contype="1" conaffinity="1" density="1000"/>
<geom rgba="1 1 1 1" type="box" pos="0.0 0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="box" pos="0.0 -0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="box" pos="0.075 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="box" pos="-0.076 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="capsule" fromto="0.073 0.073 0.0075 0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="capsule" fromto="0.073 -0.073 0.0075 0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="capsule" fromto="-0.073 0.073 0.0075 -0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
<geom rgba="1 1 1 1" type="capsule" fromto="-0.073 -0.073 0.0075 -0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
<joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="1.0"/>
<joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="1.0"/>
</body>
<body name="ball" pos="0.5 -0.8 0.275">
<geom rgba="1. 1. 1. 1" type="sphere" size="0.03 0.03 0.1" density='25' contype="1" conaffinity="1"/>
<joint name="ball_free" type="free" armature='0' damping="0" limited="false"/>
</body>
</worldbody>
<actuator>
<motor joint="r_shoulder_pan_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
<motor joint="r_shoulder_lift_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
<motor joint="r_upper_arm_roll_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
<motor joint="r_elbow_flex_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
<motor joint="r_forearm_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
<motor joint="r_wrist_flex_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
<motor joint="r_wrist_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true"/>
</actuator>
</mujoco>

View File

@ -0,0 +1,62 @@
<mujoco model="walker2d">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="0.01" damping=".1" limited="true"/>
<geom conaffinity="0" condim="3" contype="1" density="1000" friction=".7 .1 .1" rgba="0.8 0.6 .4 1"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="40 40 40" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.2/2 0 0.1">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="0.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
<!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
<body name="thigh_left" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_left_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_left_geom" rgba=".7 .3 .6 1" size="0.05" type="capsule"/>
<body name="leg_left" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_left_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_left_geom" rgba=".7 .3 .6 1" size="0.04" type="capsule"/>
<body name="foot_left" pos="0.2/2 0 0.1">
<joint axis="0 -1 0" name="foot_left_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="1.9" fromto="-0.0 0 0.1 0.2 0 0.1" name="foot_left_geom" rgba=".7 .3 .6 1" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/>-->
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="thigh_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="leg_left_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="100" joint="foot_left_joint"/>
<!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/>-->
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>

View File

@ -0,0 +1,34 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
utils.EzPickle.__init__(self)
def step(self, action):
xposbefore = self.sim.data.qpos[0]
self.do_simulation(action, self.frame_skip)
xposafter = self.sim.data.qpos[0]
ob = self._get_obs()
reward_ctrl = - 0.1 * np.square(action).sum()
reward_run = (xposafter - xposbefore)/self.dt
reward = reward_ctrl + reward_run
done = False
return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[1:],
self.sim.data.qvel.flat,
])
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.distance = self.model.stat.extent * 0.5

View File

@ -0,0 +1,40 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
utils.EzPickle.__init__(self)
def step(self, a):
posbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
posafter, height, ang = self.sim.data.qpos[0:3]
alive_bonus = 1.0
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
reward -= 1e-3 * np.square(a).sum()
s = self.state_vector()
done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
(height > .7) and (abs(ang) < .2))
ob = self._get_obs()
return ob, reward, done, {}
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[1:],
np.clip(self.sim.data.qvel.flat, -10, 10)
])
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
self.set_state(qpos, qvel)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = self.model.stat.extent * 0.75
self.viewer.cam.lookat[2] = 1.15
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,51 @@
import numpy as np
from gym.envs.mujoco import mujoco_env
from gym import utils
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoid.xml', 5)
utils.EzPickle.__init__(self)
def _get_obs(self):
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat])
def step(self, a):
pos_before = mass_center(self.model, self.sim)
self.do_simulation(a, self.frame_skip)
pos_after = mass_center(self.model, self.sim)
alive_bonus = 5.0
data = self.sim.data
lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
qpos = self.sim.data.qpos
done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 2.0
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,45 @@
from gym.envs.mujoco import mujoco_env
from gym import utils
import numpy as np
class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
utils.EzPickle.__init__(self)
def _get_obs(self):
data = self.sim.data
return np.concatenate([data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat])
def step(self, a):
self.do_simulation(a, self.frame_skip)
pos_after = self.sim.data.qpos[2]
data = self.sim.data
uph_cost = (pos_after - 0) / self.model.opt.timestep
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
done = bool(False)
return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
def reset_model(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
)
return self._get_obs()
def viewer_setup(self):
self.viewer.cam.trackbodyid = 1
self.viewer.cam.distance = self.model.stat.extent * 1.0
self.viewer.cam.lookat[2] = 0.8925
self.viewer.cam.elevation = -20

View File

@ -0,0 +1,43 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
utils.EzPickle.__init__(self)
def step(self, action):
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
x, _, y = self.sim.data.site_xpos[0]
dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
v1, v2 = self.sim.data.qvel[1:3]
vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
alive_bonus = 10
r = alive_bonus - dist_penalty - vel_penalty
done = bool(y <= 1)
return ob, r, done, {}
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos[:1], # cart x pos
np.sin(self.sim.data.qpos[1:]), # link angles
np.cos(self.sim.data.qpos[1:]),
np.clip(self.sim.data.qvel, -10, 10),
np.clip(self.sim.data.qfrc_constraint, -10, 10)
]).ravel()
def reset_model(self):
self.set_state(
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + self.np_random.randn(self.model.nv) * .1
)
return self._get_obs()
def viewer_setup(self):
v = self.viewer
v.cam.trackbodyid = 0
v.cam.distance = self.model.stat.extent * 0.5
v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]

View File

@ -0,0 +1,30 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
def step(self, a):
reward = 1.0
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
done = not notdone
return ob, reward, done, {}
def reset_model(self):
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
def viewer_setup(self):
v = self.viewer
v.cam.trackbodyid = 0
v.cam.distance = self.model.stat.extent

View File

@ -0,0 +1,145 @@
import os
from gym import error, spaces
from gym.utils import seeding
import numpy as np
from os import path
import gym
import six
try:
import mujoco_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
DEFAULT_SIZE = 500
class MujocoEnv(gym.Env):
"""Superclass for all MuJoCo environments.
"""
def __init__(self, model_path, frame_skip):
if model_path.startswith("/"):
fullpath = model_path
else:
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
if not path.exists(fullpath):
raise IOError("File %s does not exist" % fullpath)
self.frame_skip = frame_skip
self.model = mujoco_py.load_model_from_path(fullpath)
self.sim = mujoco_py.MjSim(self.model)
self.data = self.sim.data
self.viewer = None
self._viewers = {}
self.metadata = {
'render.modes': ['human', 'rgb_array', 'depth_array'],
'video.frames_per_second': int(np.round(1.0 / self.dt))
}
self.init_qpos = self.sim.data.qpos.ravel().copy()
self.init_qvel = self.sim.data.qvel.ravel().copy()
observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
assert not done
self.obs_dim = observation.size
bounds = self.model.actuator_ctrlrange.copy()
low = bounds[:, 0]
high = bounds[:, 1]
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
high = np.inf*np.ones(self.obs_dim)
low = -high
self.observation_space = spaces.Box(low, high, dtype=np.float32)
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
# methods to override:
# ----------------------------
def reset_model(self):
"""
Reset the robot degrees of freedom (qpos and qvel).
Implement this in each subclass.
"""
raise NotImplementedError
def viewer_setup(self):
"""
This method is called when the viewer is initialized.
Optionally implement this method, if you need to tinker with camera position
and so forth.
"""
pass
# -----------------------------
def reset(self):
self.sim.reset()
ob = self.reset_model()
return ob
def set_state(self, qpos, qvel):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
old_state = self.sim.get_state()
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
old_state.act, old_state.udd_state)
self.sim.set_state(new_state)
self.sim.forward()
@property
def dt(self):
return self.model.opt.timestep * self.frame_skip
def do_simulation(self, ctrl, n_frames):
self.sim.data.ctrl[:] = ctrl
for _ in range(n_frames):
self.sim.step()
def render(self, mode='human', width=DEFAULT_SIZE, height=DEFAULT_SIZE):
if mode == 'rgb_array':
self._get_viewer(mode).render(width, height)
# window size used for old mujoco-py:
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
# original image is upside-down, so flip it
return data[::-1, :, :]
elif mode == 'depth_array':
self._get_viewer(mode).render(width, height)
# window size used for old mujoco-py:
# Extract depth part of the read_pixels() tuple
data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
# original image is upside-down, so flip it
return data[::-1, :]
elif mode == 'human':
self._get_viewer(mode).render()
def close(self):
if self.viewer is not None:
# self.viewer.finish()
self.viewer = None
self._viewers = {}
def _get_viewer(self, mode):
self.viewer = self._viewers.get(mode)
if self.viewer is None:
if mode == 'human':
self.viewer = mujoco_py.MjViewer(self.sim)
elif mode == 'rgb_array' or mode == 'depth_array':
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
self.viewer_setup()
self._viewers[mode] = self.viewer
return self.viewer
def get_body_com(self, body_name):
return self.data.get_body_xpos(body_name)
def state_vector(self):
return np.concatenate([
self.sim.data.qpos.flat,
self.sim.data.qvel.flat
])

View File

@ -0,0 +1,57 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
import mujoco_py
class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
reward_near = - np.linalg.norm(vec_1)
reward_dist = - np.linalg.norm(vec_2)
reward_ctrl = - np.square(a).sum()
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = -1
self.viewer.cam.distance = 4.0
def reset_model(self):
qpos = self.init_qpos
self.goal_pos = np.asarray([0, 0])
while True:
self.cylinder_pos = np.concatenate([
self.np_random.uniform(low=-0.3, high=0, size=1),
self.np_random.uniform(low=-0.2, high=0.2, size=1)])
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
break
qpos[-4:-2] = self.cylinder_pos
qpos[-2:] = self.goal_pos
qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
high=0.005, size=self.model.nv)
qvel[-4:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
])

View File

@ -0,0 +1,43 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
def step(self, a):
vec = self.get_body_com("fingertip")-self.get_body_com("target")
reward_dist = - np.linalg.norm(vec)
reward_ctrl = - np.square(a).sum()
reward = reward_dist + reward_ctrl
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
def reset_model(self):
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
while True:
self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
if np.linalg.norm(self.goal) < 2:
break
qpos[-2:] = self.goal
qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
qvel[-2:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
theta = self.sim.data.qpos.flat[:2]
return np.concatenate([
np.cos(theta),
np.sin(theta),
self.sim.data.qpos.flat[2:],
self.sim.data.qvel.flat[:2],
self.get_body_com("fingertip") - self.get_body_com("target")
])

View File

@ -0,0 +1,75 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class StrikerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
utils.EzPickle.__init__(self)
self._striked = False
self._min_strike_dist = np.inf
self.strike_threshold = 0.1
mujoco_env.MujocoEnv.__init__(self, 'striker.xml', 5)
def step(self, a):
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
self._min_strike_dist = min(self._min_strike_dist, np.linalg.norm(vec_2))
if np.linalg.norm(vec_1) < self.strike_threshold:
self._striked = True
self._strike_pos = self.get_body_com("tips_arm")
if self._striked:
vec_3 = self.get_body_com("object") - self._strike_pos
reward_near = - np.linalg.norm(vec_3)
else:
reward_near = - np.linalg.norm(vec_1)
reward_dist = - np.linalg.norm(self._min_strike_dist)
reward_ctrl = - np.square(a).sum()
reward = 3 * reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
self.do_simulation(a, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl)
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
self.viewer.cam.distance = 4.0
def reset_model(self):
self._min_strike_dist = np.inf
self._striked = False
self._strike_pos = None
qpos = self.init_qpos
self.ball = np.array([0.5, -0.175])
while True:
self.goal = np.concatenate([
self.np_random.uniform(low=0.15, high=0.7, size=1),
self.np_random.uniform(low=0.1, high=1.0, size=1)])
if np.linalg.norm(self.ball - self.goal) > 0.17:
break
qpos[-9:-7] = [self.ball[1], self.ball[0]]
qpos[-7:-5] = self.goal
diff = self.ball - self.goal
angle = -np.arctan(diff[0] / (diff[1] + 1e-8))
qpos[-1] = angle / 3.14
qvel = self.init_qvel + self.np_random.uniform(low=-.1, high=.1,
size=self.model.nv)
qvel[7:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[:7],
self.sim.data.qvel.flat[:7],
self.get_body_com("tips_arm"),
self.get_body_com("object"),
self.get_body_com("goal"),
])

View File

@ -0,0 +1,31 @@
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
utils.EzPickle.__init__(self)
def step(self, a):
ctrl_cost_coeff = 0.0001
xposbefore = self.sim.data.qpos[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.sim.data.qpos[0]
reward_fwd = (xposafter - xposbefore) / self.dt
reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
reward = reward_fwd + reward_ctrl
ob = self._get_obs()
return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
def _get_obs(self):
qpos = self.sim.data.qpos
qvel = self.sim.data.qvel
return np.concatenate([qpos.flat[2:], qvel.flat])
def reset_model(self):
self.set_state(
self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
)
return self._get_obs()

Some files were not shown because too many files have changed in this diff Show More