env::::
env_instance = create_environment(tuning_parameters)
agent = eval(tuning_parameters.agent.type + '(env_instance, tuning_parameters)')
def create_environment(tuning_parameters):
env_type_name, env_type = EnvTypes().verify(tuning_parameters.env.type)
env = eval(env_type)(tuning_parameters)
return env
class GymEnvironmentWrapper(EnvironmentWrapper):
def __init__(self, tuning_parameters):
EnvironmentWrapper.__init__(self, tuning_parameters)
# env parameters
class EnvironmentWrapper(object):
def __init__(self, tuning_parameters):
"""
:param tuning_parameters:
:type tuning_parameters: Preset
"""
# env initialization
self.game = []
self.actions = {}
self.state = []
self.reward = 0
self.done = False
self.default_action = 0
self.last_action_idx = 0
self.episode_idx = 0
self.last_episode_time = time.time()
self.info = []
self.action_space_low = 0
self.action_space_high = 0
self.action_space_abs_range = 0
self.actions_description = {}
self.discrete_controls = True
self.action_space_size = 0
self.key_to_action = {}
self.width = 1
self.height = 1
self.is_state_type_image = True
self.measurements_size = 0
self.phase = RunPhase.TRAIN
self.tp = tuning_parameters
self.record_video_every = self.tp.visualization.record_video_every
self.env_id = self.tp.env.level
self.video_path = self.tp.visualization.video_path
self.is_rendered = self.tp.visualization.render
self.seed = self.tp.seed
self.frame_skip = self.tp.env.frame_skip
self.human_control = self.tp.env.human_control
self.wait_for_explicit_human_action = False
self.is_rendered = self.is_rendered or self.human_control
self.game_is_open = True
self.renderer = Renderer()
@property
def measurements(self):
assert False
@measurements.setter
def measurements(self, value):
assert False
@property
def observation(self):
assert False
@observation.setter
def observation(self, value):
assert False
def _idx_to_action(self, action_idx):
"""
class EnvRegistry(object):
"""Register an env by ID. IDs remain stable over time and are
797个env
agent:::
!!!
if 'action_intrinsic_reward' in action_info.keys():
下面是3个类的继承及函数重载。
class PolicyOptimizationAgent(Agent):
下面就是agent初始化完毕开始循环跑起来
agent = eval(tuning_parameters.agent.type + '(env_instance, tuning_parameters)')
# Start the training or evaluation
if tuning_parameters.evaluate:
agent.evaluate(sys.maxsize, keep_networks_synced=True) # evaluate forever
else:
agent.improve()
基本流程大致如此。后面继续细化。