12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123 | def main(args=None):
"""Main training function"""
action_dim = 2 # number of actions produced by the model
max_action = 1 # maximum absolute value of output actions
state_dim = 185 # number of input values in the neural network (vector length of state input)
device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu"
) # using cuda if it is available, cpu otherwise
nr_eval_episodes = 10 # how many episodes to use to run evaluation
max_epochs = 60 # max number of epochs
epoch = 0 # starting epoch number
episodes_per_epoch = 70 # how many episodes to run in single epoch
episode = 0 # starting episode number
train_every_n = 2 # train and update network parameters every n episodes
training_iterations = 80 # how many batches to use for single training cycle
batch_size = 64 # batch size for each training iteration
max_steps = 300 # maximum number of steps in single episode
steps = 0 # starting step number
load_saved_buffer = False # whether to load experiences from assets/data.yml
pretrain = False # whether to use the loaded experiences to pre-train the model (load_saved_buffer must be True)
history_len = 10
pretraining_iterations = (
10 # number of training iterations to run during pre-training
)
save_every = 10 # save the model every n training cycles
state_queue = deque(maxlen=history_len)
model = RCPG(
state_dim=state_dim,
action_dim=action_dim,
max_action=max_action,
device=device,
save_every=save_every,
load_model=False,
rnn="gru",
) # instantiate a model
sim = SIM_ENV() # instantiate environment
replay_buffer = get_buffer(
model,
sim,
load_saved_buffer,
pretrain,
pretraining_iterations,
training_iterations,
batch_size,
)
latest_scan, distance, cos, sin, collision, goal, a, reward = sim.step(
lin_velocity=0.0, ang_velocity=0.0
) # get the initial step state
fill_state = True
while epoch < max_epochs: # train until max_epochs is reached
state, terminal = model.prepare_state(
latest_scan, distance, cos, sin, collision, goal, a
) # get state a state representation from returned data from the environment
if fill_state:
state_queue.clear()
for _ in range(history_len):
state_queue.append(state)
fill_state = False
state_queue.append(state)
action = model.get_action(
np.array(state_queue), True
) # get an action from the model
a_in = [
(action[0] + 1) / 4,
action[1],
] # clip linear velocity to [0, 0.5] m/s range
latest_scan, distance, cos, sin, collision, goal, a, reward = sim.step(
lin_velocity=a_in[0], ang_velocity=a_in[1]
) # get data from the environment
next_state, terminal = model.prepare_state(
latest_scan, distance, cos, sin, collision, goal, a
) # get a next state representation
replay_buffer.add(
state, action, reward, terminal, next_state
) # add experience to the replay buffer
if (
terminal or steps == max_steps
): # reset environment of terminal stat ereached, or max_steps were taken
fill_state = True
latest_scan, distance, cos, sin, collision, goal, a, reward = sim.reset()
episode += 1
if episode % train_every_n == 0:
model.train(
replay_buffer=replay_buffer,
iterations=training_iterations,
batch_size=batch_size,
) # train the model and update its parameters
steps = 0
else:
steps += 1
if (
episode + 1
) % episodes_per_epoch == 0: # if epoch is concluded, run evaluation
episode = 0
epoch += 1
evaluate(
model,
epoch,
sim,
history_len,
max_steps,
eval_episodes=nr_eval_episodes,
)
|