classCustomEnv(gym.Env): """Custom Environment that follows gym interface""" metadata = {"render.modes": ["human"]} def__init__(self, arg1, arg2, ...): super(CustomEnv, self).__init__() # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) # Example for using image as input (channel-first; channel-last also works): self.observation_space = spaces.Box(low=0, high=255, shape=(N_CHANNELS, HEIGHT, WIDTH), dtype=np.uint8)
defstep(self, action): ... return observation, reward, done, info defreset(self): ... return observation # reward, done, info can't be included defrender(self, mode="human"): ... defclose (self): ...
环境测试
(1)SB3提供了用于测试环境是否符合基本逻辑的工具
1 2 3 4 5
from stable_baselines3.common.env_checker import check_env from snakeenv import SnekEnv # 从编写好的自定义环境中引入该类
env = SnekEnv() check_env(env)
(2)二次自测
通过模拟实际训练的过程,测试环境是否可用
1 2 3 4 5 6 7 8 9 10 11 12 13
from snakeenv import SnekEnv
env = SnekEnv() episodes = 50
for ep inrange(episodes): obs = env.reset() done = False whilenot done: random_action = env.action_space.sample() # 采用随机采样即可 print("action",random_action) obs, reward, done, info = env.step(random_action) # 测试环境是否给出形式正确的反馈 print("reward",reward)