Spaces:
Configuration error
Configuration error
File size: 5,304 Bytes
ae22fc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import numpy as np
import gymnasium as gym
from opensimplex import OpenSimplex
import time
# Try to import PyFlyt, else fallback
try:
from PyFlyt.gym_envs.quadx_envs.quadx_waypoints_env import QuadXWaypointsEnv
PYFLYT_AVAILABLE = True
except ImportError:
PYFLYT_AVAILABLE = False
# Create a dummy base class if PyFlyt is missing
class QuadXWaypointsEnv(gym.Env):
metadata = {"render_modes": ["human", "rgb_array"]}
def __init__(self, render_mode=None):
self.render_mode = render_mode
class WindField:
def __init__(self, seed=42, scale=0.1, speed=1.0):
self.noise = OpenSimplex(seed=seed)
self.scale = scale
self.speed = speed
self.time_offset = 0.0
def get_wind(self, x, y, z, dt):
self.time_offset += dt * self.speed
u = self.noise.noise4(x * self.scale, y * self.scale, z * self.scale, self.time_offset)
v = self.noise.noise4(x * self.scale + 100, y * self.scale + 100, z * self.scale, self.time_offset)
w = self.noise.noise4(x * self.scale + 200, y * self.scale + 200, z * self.scale, self.time_offset)
return np.array([u, v, w])
class Drone3DEnv(gym.Env):
def __init__(self, render_mode=None, wind_scale=10.0, wind_speed=1.0):
super().__init__()
self.render_mode = render_mode
self.wind_field = WindField(scale=0.05, speed=wind_speed)
self.wind_strength = wind_scale
# Define spaces
# Obs: [x, y, z, roll, pitch, yaw, u, v, w, p, q, r]
self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32)
# Action: [motor1, motor2, motor3, motor4] or [thrust, roll, pitch, yaw]
# We'll assume simple [thrust_x, thrust_y, thrust_z, yaw] for the mock
self.action_space = gym.spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32)
self.state = np.zeros(12)
self.dt = 0.05
self.step_count = 0
self.max_steps = 1000
def reset(self, seed=None, options=None):
super().reset(seed=seed)
self.state = np.zeros(12)
self.state[2] = 10.0 # Start at 10m height
self.step_count = 0
# Randomize Target
# x, y in [-5, 5], z in [5, 15]
self.target = np.random.uniform(low=[-5, -5, 5], high=[5, 5, 15])
# Randomize Wind
# We can just re-initialize the noise with a random seed
new_seed = np.random.randint(0, 10000)
self.wind_field = WindField(seed=new_seed, scale=0.05, speed=self.wind_field.speed)
return self.state.astype(np.float32), {}
def step(self, action):
self.step_count += 1
# Unpack state
pos = self.state[0:3]
vel = self.state[6:9]
# Get Wind
raw_wind = self.wind_field.get_wind(pos[0], pos[1], pos[2], self.dt)
wind_force = raw_wind * self.wind_strength
# Simple Kinematics (Double Integrator)
# Action is roughly acceleration command
# We need enough authority to fight gravity (9.81) + wind
# Let's say max thrust is 20 m/s^2 (~2G)
# Action [-1, 1] -> [-20, 20] ?
# No, usually thrust is positive 0..Max.
# But for simplified "QuadX" control often inputs are roll/pitch/yaw/thrust.
# Here we are abstracting to "Force/Accel command in 3D".
# Let's map action [-1, 1] to [-15, 15] acceleration.
accel = action[:3] * 15.0
# Gravity
gravity = np.array([0, 0, -9.81])
# Total Force = Control + Wind + Gravity
# Note: We REMOVED the "anti-gravity" offset.
# The agent MUST output positive Z acceleration to hover.
# If action[2] is 0, accel[2] is 0, and it falls due to gravity.
total_accel = accel + wind_force + gravity
# Update State
vel += total_accel * self.dt
pos += vel * self.dt
# Floor collision
if pos[2] < 0:
pos[2] = 0
vel[2] = 0 # Crash stop
# Drag (Damping)
vel *= 0.95
self.state[0:3] = pos
self.state[6:9] = vel
# Reward: Stay close to Target
dist = np.linalg.norm(pos - self.target)
# Smoothness: Penalty for high velocity (instability)
vel_mag = np.linalg.norm(vel)
# Components:
# 1. Distance Reward: Higher is better (closer to 0)
r_dist = -dist
# 2. Stability Penalty: Penalize erratic high-speed movements if far from target
# But we need speed to get there. Let's just penalize extreme speed.
r_vel = -0.01 * vel_mag
# 3. Survival Reward: Bonus for not crashing
r_survive = 0.1
reward = r_dist + r_vel + r_survive
# Terminate if crashed or too far
term = False # Let it crash and stay on floor
trunc = self.step_count >= self.max_steps
info = {"wind": wind_force, "target": self.target}
return self.state.astype(np.float32), reward, term, trunc, info
def render(self):
# We will handle rendering in the demo script using matplotlib
pass
|