Skip to content

MARL-IR-SIM

robot_nav.SIM_ENV.marl_sim

MARL_SIM

Bases: SIM_ENV

Simulation environment for multi-agent robot navigation using IRSim.

This class extends the SIM_ENV and provides a wrapper for multi-robot simulation and interaction, supporting reward computation and custom reset logic.

Attributes:

Name Type Description
env object

IRSim simulation environment instance.

robot_goal ndarray

Current goal position(s) for the robots.

num_robots int

Number of robots in the environment.

x_range tuple

World x-range.

y_range tuple

World y-range.

Source code in robot_nav/SIM_ENV/marl_sim.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
class MARL_SIM(SIM_ENV):
    """
    Simulation environment for multi-agent robot navigation using IRSim.

    This class extends the SIM_ENV and provides a wrapper for multi-robot
    simulation and interaction, supporting reward computation and custom reset logic.

    Attributes:
        env (object): IRSim simulation environment instance.
        robot_goal (np.ndarray): Current goal position(s) for the robots.
        num_robots (int): Number of robots in the environment.
        x_range (tuple): World x-range.
        y_range (tuple): World y-range.
    """

    def __init__(self, world_file="multi_robot_world.yaml", disable_plotting=False):
        """
        Initialize the MARL_SIM environment.

        Args:
            world_file (str, optional): Path to the world configuration YAML file.
            disable_plotting (bool, optional): If True, disables IRSim rendering and plotting.
        """
        display = False if disable_plotting else True
        self.env = irsim.make(
            world_file, disable_all_plot=disable_plotting, display=display
        )
        robot_info = self.env.get_robot_info(0)
        self.robot_goal = robot_info.goal
        self.num_robots = len(self.env.robot_list)
        self.x_range = self.env._world.x_range
        self.y_range = self.env._world.y_range

    def step(self, action, connection, combined_weights=None):
        """
        Perform a simulation step for all robots using the provided actions and connections.

        Args:
            action (list): List of actions for each robot [[lin_vel, ang_vel], ...].
            connection (Tensor): Tensor of shape (num_robots, num_robots-1) containing logits indicating connections between robots.
            combined_weights (Tensor or None, optional): Optional weights for each connection, shape (num_robots, num_robots-1).

        Returns:
            tuple: (
                poses (list): List of [x, y, theta] for each robot,
                distances (list): Distance to goal for each robot,
                coss (list): Cosine of angle to goal for each robot,
                sins (list): Sine of angle to goal for each robot,
                collisions (list): Collision status for each robot,
                goals (list): Goal reached status for each robot,
                action (list): Actions applied,
                rewards (list): Rewards computed,
                positions (list): Current [x, y] for each robot,
                goal_positions (list): Goal [x, y] for each robot,
            )
        """
        self.env.step(action_id=[i for i in range(self.num_robots)], action=action)
        self.env.render()

        poses = []
        distances = []
        coss = []
        sins = []
        collisions = []
        goals = []
        rewards = []
        positions = []
        goal_positions = []
        robot_states = [
            [self.env.robot_list[i].state[0], self.env.robot_list[i].state[1]]
            for i in range(self.num_robots)
        ]
        for i in range(self.num_robots):

            robot_state = self.env.robot_list[i].state
            closest_robots = [
                np.linalg.norm(
                    [
                        robot_states[j][0] - robot_state[0],
                        robot_states[j][1] - robot_state[1],
                    ]
                )
                for j in range(self.num_robots)
                if j != i
            ]
            robot_goal = self.env.robot_list[i].goal
            goal_vector = [
                robot_goal[0].item() - robot_state[0].item(),
                robot_goal[1].item() - robot_state[1].item(),
            ]
            distance = np.linalg.norm(goal_vector)
            goal = self.env.robot_list[i].arrive
            pose_vector = [np.cos(robot_state[2]).item(), np.sin(robot_state[2]).item()]
            cos, sin = self.cossin(pose_vector, goal_vector)
            collision = self.env.robot_list[i].collision
            action_i = action[i]
            reward = self.get_reward(
                goal, collision, action_i, closest_robots, distance
            )

            position = [robot_state[0].item(), robot_state[1].item()]
            goal_position = [robot_goal[0].item(), robot_goal[1].item()]

            distances.append(distance)
            coss.append(cos)
            sins.append(sin)
            collisions.append(collision)
            goals.append(goal)
            rewards.append(reward)
            positions.append(position)
            poses.append(
                [robot_state[0].item(), robot_state[1].item(), robot_state[2].item()]
            )
            goal_positions.append(goal_position)

            i_probs = torch.sigmoid(
                connection[i]
            )  # connection[i] is logits for "connect" per pair

            # Now we need to insert the self-connection (optional, typically skipped)
            i_connections = i_probs.tolist()
            i_connections.insert(i, 0)
            if combined_weights is not None:
                i_weights = combined_weights[i].tolist()
                i_weights.insert(i, 0)

            for j in range(self.num_robots):
                if i_connections[j] > 0.5:
                    if combined_weights is not None:
                        weight = i_weights[j]
                    else:
                        weight = 1
                    other_robot_state = self.env.robot_list[j].state
                    other_pos = [
                        other_robot_state[0].item(),
                        other_robot_state[1].item(),
                    ]
                    rx = [position[0], other_pos[0]]
                    ry = [position[1], other_pos[1]]
                    self.env.draw_trajectory(
                        np.array([rx, ry]), refresh=True, linewidth=weight
                    )

            if goal:
                self.env.robot_list[i].set_random_goal(
                    obstacle_list=self.env.obstacle_list,
                    init=True,
                    range_limits=[
                        [self.x_range[0] + 1, self.y_range[0] + 1, -3.141592653589793],
                        [self.x_range[1] - 1, self.y_range[1] - 1, 3.141592653589793],
                    ],
                )

        return (
            poses,
            distances,
            coss,
            sins,
            collisions,
            goals,
            action,
            rewards,
            positions,
            goal_positions,
        )

    def reset(
        self,
        robot_state=None,
        robot_goal=None,
        random_obstacles=False,
        random_obstacle_ids=None,
    ):
        """
        Reset the simulation environment and optionally set robot and obstacle positions.

        Args:
            robot_state (list or None, optional): Initial state for robots as [x, y, theta, speed].
            robot_goal (list or None, optional): Goal position(s) for the robots.
            random_obstacles (bool, optional): If True, randomly position obstacles.
            random_obstacle_ids (list or None, optional): IDs of obstacles to randomize.

        Returns:
            tuple: (
                poses (list): List of [x, y, theta] for each robot,
                distances (list): Distance to goal for each robot,
                coss (list): Cosine of angle to goal for each robot,
                sins (list): Sine of angle to goal for each robot,
                collisions (list): All False after reset,
                goals (list): All False after reset,
                action (list): Initial action ([[0.0, 0.0], ...]),
                rewards (list): Rewards for initial state,
                positions (list): Initial [x, y] for each robot,
                goal_positions (list): Initial goal [x, y] for each robot,
            )
        """
        if robot_state is None:
            robot_state = [[random.uniform(3, 9)], [random.uniform(3, 9)], [0]]

        init_states = []
        for robot in self.env.robot_list:
            conflict = True
            while conflict:
                conflict = False
                robot_state = [
                    [random.uniform(3, 9)],
                    [random.uniform(3, 9)],
                    [random.uniform(-3.14, 3.14)],
                ]
                pos = [robot_state[0][0], robot_state[1][0]]
                for loc in init_states:
                    vector = [
                        pos[0] - loc[0],
                        pos[1] - loc[1],
                    ]
                    if np.linalg.norm(vector) < 0.6:
                        conflict = True
            init_states.append(pos)

            robot.set_state(
                state=np.array(robot_state),
                init=True,
            )

        if random_obstacles:
            if random_obstacle_ids is None:
                random_obstacle_ids = [i + self.num_robots for i in range(7)]
            self.env.random_obstacle_position(
                range_low=[self.x_range[0], self.y_range[0], -3.14],
                range_high=[self.x_range[1], self.y_range[1], 3.14],
                ids=random_obstacle_ids,
                non_overlapping=True,
            )

        for robot in self.env.robot_list:
            if robot_goal is None:
                robot.set_random_goal(
                    obstacle_list=self.env.obstacle_list,
                    init=True,
                    range_limits=[
                        [self.x_range[0] + 1, self.y_range[0] + 1, -3.141592653589793],
                        [self.x_range[1] - 1, self.y_range[1] - 1, 3.141592653589793],
                    ],
                )
            else:
                self.env.robot.set_goal(np.array(robot_goal), init=True)
        self.env.reset()
        self.robot_goal = self.env.robot.goal

        action = [[0.0, 0.0] for _ in range(self.num_robots)]
        con = torch.tensor(
            [[0.0 for _ in range(self.num_robots - 1)] for _ in range(self.num_robots)]
        )
        poses, distance, cos, sin, _, _, action, reward, positions, goal_positions = (
            self.step(action, con)
        )
        return (
            poses,
            distance,
            cos,
            sin,
            [False] * self.num_robots,
            [False] * self.num_robots,
            action,
            reward,
            positions,
            goal_positions,
        )

    @staticmethod
    def get_reward(goal, collision, action, closest_robots, distance, phase=1):
        """
        Calculate the reward for a robot given the current state and action.

        Args:
            goal (bool): Whether the robot reached its goal.
            collision (bool): Whether a collision occurred.
            action (list): [linear_velocity, angular_velocity] applied.
            closest_robots (list): Distances to the closest other robots.
            distance (float): Distance to the goal.
            phase (int, optional): Reward phase/function selector (default: 1).

        Returns:
            float: Computed reward.
        """

        match phase:
            case 1:
                if goal:
                    return 100.0
                elif collision:
                    return -100.0 * 3 * action[0]
                else:
                    r_dist = 1.5 / distance
                    cl_pen = 0
                    for rob in closest_robots:
                        add = 1.5 - rob if rob < 1.5 else 0
                        cl_pen += add

                    return action[0] - 0.5 * abs(action[1]) - cl_pen + r_dist

            case 2:
                if goal:
                    return 70.0
                elif collision:
                    return -100.0 * 3 * action[0]
                else:
                    cl_pen = 0
                    for rob in closest_robots:
                        add = (3 - rob) ** 2 if rob < 3 else 0
                        cl_pen += add

                    return -0.5 * abs(action[1]) - cl_pen

            case _:
                raise ValueError("Unknown reward phase")

__init__(world_file='multi_robot_world.yaml', disable_plotting=False)

Initialize the MARL_SIM environment.

Parameters:

Name Type Description Default
world_file str

Path to the world configuration YAML file.

'multi_robot_world.yaml'
disable_plotting bool

If True, disables IRSim rendering and plotting.

False
Source code in robot_nav/SIM_ENV/marl_sim.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def __init__(self, world_file="multi_robot_world.yaml", disable_plotting=False):
    """
    Initialize the MARL_SIM environment.

    Args:
        world_file (str, optional): Path to the world configuration YAML file.
        disable_plotting (bool, optional): If True, disables IRSim rendering and plotting.
    """
    display = False if disable_plotting else True
    self.env = irsim.make(
        world_file, disable_all_plot=disable_plotting, display=display
    )
    robot_info = self.env.get_robot_info(0)
    self.robot_goal = robot_info.goal
    self.num_robots = len(self.env.robot_list)
    self.x_range = self.env._world.x_range
    self.y_range = self.env._world.y_range

get_reward(goal, collision, action, closest_robots, distance, phase=1) staticmethod

Calculate the reward for a robot given the current state and action.

Parameters:

Name Type Description Default
goal bool

Whether the robot reached its goal.

required
collision bool

Whether a collision occurred.

required
action list

[linear_velocity, angular_velocity] applied.

required
closest_robots list

Distances to the closest other robots.

required
distance float

Distance to the goal.

required
phase int

Reward phase/function selector (default: 1).

1

Returns:

Name Type Description
float

Computed reward.

Source code in robot_nav/SIM_ENV/marl_sim.py
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
@staticmethod
def get_reward(goal, collision, action, closest_robots, distance, phase=1):
    """
    Calculate the reward for a robot given the current state and action.

    Args:
        goal (bool): Whether the robot reached its goal.
        collision (bool): Whether a collision occurred.
        action (list): [linear_velocity, angular_velocity] applied.
        closest_robots (list): Distances to the closest other robots.
        distance (float): Distance to the goal.
        phase (int, optional): Reward phase/function selector (default: 1).

    Returns:
        float: Computed reward.
    """

    match phase:
        case 1:
            if goal:
                return 100.0
            elif collision:
                return -100.0 * 3 * action[0]
            else:
                r_dist = 1.5 / distance
                cl_pen = 0
                for rob in closest_robots:
                    add = 1.5 - rob if rob < 1.5 else 0
                    cl_pen += add

                return action[0] - 0.5 * abs(action[1]) - cl_pen + r_dist

        case 2:
            if goal:
                return 70.0
            elif collision:
                return -100.0 * 3 * action[0]
            else:
                cl_pen = 0
                for rob in closest_robots:
                    add = (3 - rob) ** 2 if rob < 3 else 0
                    cl_pen += add

                return -0.5 * abs(action[1]) - cl_pen

        case _:
            raise ValueError("Unknown reward phase")

reset(robot_state=None, robot_goal=None, random_obstacles=False, random_obstacle_ids=None)

Reset the simulation environment and optionally set robot and obstacle positions.

Parameters:

Name Type Description Default
robot_state list or None

Initial state for robots as [x, y, theta, speed].

None
robot_goal list or None

Goal position(s) for the robots.

None
random_obstacles bool

If True, randomly position obstacles.

False
random_obstacle_ids list or None

IDs of obstacles to randomize.

None

Returns:

Name Type Description
tuple

( poses (list): List of [x, y, theta] for each robot, distances (list): Distance to goal for each robot, coss (list): Cosine of angle to goal for each robot, sins (list): Sine of angle to goal for each robot, collisions (list): All False after reset, goals (list): All False after reset, action (list): Initial action ([[0.0, 0.0], ...]), rewards (list): Rewards for initial state, positions (list): Initial [x, y] for each robot, goal_positions (list): Initial goal [x, y] for each robot,

)

Source code in robot_nav/SIM_ENV/marl_sim.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
def reset(
    self,
    robot_state=None,
    robot_goal=None,
    random_obstacles=False,
    random_obstacle_ids=None,
):
    """
    Reset the simulation environment and optionally set robot and obstacle positions.

    Args:
        robot_state (list or None, optional): Initial state for robots as [x, y, theta, speed].
        robot_goal (list or None, optional): Goal position(s) for the robots.
        random_obstacles (bool, optional): If True, randomly position obstacles.
        random_obstacle_ids (list or None, optional): IDs of obstacles to randomize.

    Returns:
        tuple: (
            poses (list): List of [x, y, theta] for each robot,
            distances (list): Distance to goal for each robot,
            coss (list): Cosine of angle to goal for each robot,
            sins (list): Sine of angle to goal for each robot,
            collisions (list): All False after reset,
            goals (list): All False after reset,
            action (list): Initial action ([[0.0, 0.0], ...]),
            rewards (list): Rewards for initial state,
            positions (list): Initial [x, y] for each robot,
            goal_positions (list): Initial goal [x, y] for each robot,
        )
    """
    if robot_state is None:
        robot_state = [[random.uniform(3, 9)], [random.uniform(3, 9)], [0]]

    init_states = []
    for robot in self.env.robot_list:
        conflict = True
        while conflict:
            conflict = False
            robot_state = [
                [random.uniform(3, 9)],
                [random.uniform(3, 9)],
                [random.uniform(-3.14, 3.14)],
            ]
            pos = [robot_state[0][0], robot_state[1][0]]
            for loc in init_states:
                vector = [
                    pos[0] - loc[0],
                    pos[1] - loc[1],
                ]
                if np.linalg.norm(vector) < 0.6:
                    conflict = True
        init_states.append(pos)

        robot.set_state(
            state=np.array(robot_state),
            init=True,
        )

    if random_obstacles:
        if random_obstacle_ids is None:
            random_obstacle_ids = [i + self.num_robots for i in range(7)]
        self.env.random_obstacle_position(
            range_low=[self.x_range[0], self.y_range[0], -3.14],
            range_high=[self.x_range[1], self.y_range[1], 3.14],
            ids=random_obstacle_ids,
            non_overlapping=True,
        )

    for robot in self.env.robot_list:
        if robot_goal is None:
            robot.set_random_goal(
                obstacle_list=self.env.obstacle_list,
                init=True,
                range_limits=[
                    [self.x_range[0] + 1, self.y_range[0] + 1, -3.141592653589793],
                    [self.x_range[1] - 1, self.y_range[1] - 1, 3.141592653589793],
                ],
            )
        else:
            self.env.robot.set_goal(np.array(robot_goal), init=True)
    self.env.reset()
    self.robot_goal = self.env.robot.goal

    action = [[0.0, 0.0] for _ in range(self.num_robots)]
    con = torch.tensor(
        [[0.0 for _ in range(self.num_robots - 1)] for _ in range(self.num_robots)]
    )
    poses, distance, cos, sin, _, _, action, reward, positions, goal_positions = (
        self.step(action, con)
    )
    return (
        poses,
        distance,
        cos,
        sin,
        [False] * self.num_robots,
        [False] * self.num_robots,
        action,
        reward,
        positions,
        goal_positions,
    )

step(action, connection, combined_weights=None)

Perform a simulation step for all robots using the provided actions and connections.

Parameters:

Name Type Description Default
action list

List of actions for each robot [[lin_vel, ang_vel], ...].

required
connection Tensor

Tensor of shape (num_robots, num_robots-1) containing logits indicating connections between robots.

required
combined_weights Tensor or None

Optional weights for each connection, shape (num_robots, num_robots-1).

None

Returns:

Name Type Description
tuple

( poses (list): List of [x, y, theta] for each robot, distances (list): Distance to goal for each robot, coss (list): Cosine of angle to goal for each robot, sins (list): Sine of angle to goal for each robot, collisions (list): Collision status for each robot, goals (list): Goal reached status for each robot, action (list): Actions applied, rewards (list): Rewards computed, positions (list): Current [x, y] for each robot, goal_positions (list): Goal [x, y] for each robot,

)

Source code in robot_nav/SIM_ENV/marl_sim.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def step(self, action, connection, combined_weights=None):
    """
    Perform a simulation step for all robots using the provided actions and connections.

    Args:
        action (list): List of actions for each robot [[lin_vel, ang_vel], ...].
        connection (Tensor): Tensor of shape (num_robots, num_robots-1) containing logits indicating connections between robots.
        combined_weights (Tensor or None, optional): Optional weights for each connection, shape (num_robots, num_robots-1).

    Returns:
        tuple: (
            poses (list): List of [x, y, theta] for each robot,
            distances (list): Distance to goal for each robot,
            coss (list): Cosine of angle to goal for each robot,
            sins (list): Sine of angle to goal for each robot,
            collisions (list): Collision status for each robot,
            goals (list): Goal reached status for each robot,
            action (list): Actions applied,
            rewards (list): Rewards computed,
            positions (list): Current [x, y] for each robot,
            goal_positions (list): Goal [x, y] for each robot,
        )
    """
    self.env.step(action_id=[i for i in range(self.num_robots)], action=action)
    self.env.render()

    poses = []
    distances = []
    coss = []
    sins = []
    collisions = []
    goals = []
    rewards = []
    positions = []
    goal_positions = []
    robot_states = [
        [self.env.robot_list[i].state[0], self.env.robot_list[i].state[1]]
        for i in range(self.num_robots)
    ]
    for i in range(self.num_robots):

        robot_state = self.env.robot_list[i].state
        closest_robots = [
            np.linalg.norm(
                [
                    robot_states[j][0] - robot_state[0],
                    robot_states[j][1] - robot_state[1],
                ]
            )
            for j in range(self.num_robots)
            if j != i
        ]
        robot_goal = self.env.robot_list[i].goal
        goal_vector = [
            robot_goal[0].item() - robot_state[0].item(),
            robot_goal[1].item() - robot_state[1].item(),
        ]
        distance = np.linalg.norm(goal_vector)
        goal = self.env.robot_list[i].arrive
        pose_vector = [np.cos(robot_state[2]).item(), np.sin(robot_state[2]).item()]
        cos, sin = self.cossin(pose_vector, goal_vector)
        collision = self.env.robot_list[i].collision
        action_i = action[i]
        reward = self.get_reward(
            goal, collision, action_i, closest_robots, distance
        )

        position = [robot_state[0].item(), robot_state[1].item()]
        goal_position = [robot_goal[0].item(), robot_goal[1].item()]

        distances.append(distance)
        coss.append(cos)
        sins.append(sin)
        collisions.append(collision)
        goals.append(goal)
        rewards.append(reward)
        positions.append(position)
        poses.append(
            [robot_state[0].item(), robot_state[1].item(), robot_state[2].item()]
        )
        goal_positions.append(goal_position)

        i_probs = torch.sigmoid(
            connection[i]
        )  # connection[i] is logits for "connect" per pair

        # Now we need to insert the self-connection (optional, typically skipped)
        i_connections = i_probs.tolist()
        i_connections.insert(i, 0)
        if combined_weights is not None:
            i_weights = combined_weights[i].tolist()
            i_weights.insert(i, 0)

        for j in range(self.num_robots):
            if i_connections[j] > 0.5:
                if combined_weights is not None:
                    weight = i_weights[j]
                else:
                    weight = 1
                other_robot_state = self.env.robot_list[j].state
                other_pos = [
                    other_robot_state[0].item(),
                    other_robot_state[1].item(),
                ]
                rx = [position[0], other_pos[0]]
                ry = [position[1], other_pos[1]]
                self.env.draw_trajectory(
                    np.array([rx, ry]), refresh=True, linewidth=weight
                )

        if goal:
            self.env.robot_list[i].set_random_goal(
                obstacle_list=self.env.obstacle_list,
                init=True,
                range_limits=[
                    [self.x_range[0] + 1, self.y_range[0] + 1, -3.141592653589793],
                    [self.x_range[1] - 1, self.y_range[1] - 1, 3.141592653589793],
                ],
            )

    return (
        poses,
        distances,
        coss,
        sins,
        collisions,
        goals,
        action,
        rewards,
        positions,
        goal_positions,
    )