diff --git a/gym/benchmarks/__init__.py b/gym/benchmarks/__init__.py index 0dd7b83aafa..6150ccca167 100644 --- a/gym/benchmarks/__init__.py +++ b/gym/benchmarks/__init__.py @@ -1,333 +1,128 @@ # EXPERIMENTAL: all may be removed soon from gym.benchmarks import scoring -from gym.benchmarks.registration import benchmark_spec, register_benchmark, registry # imports used elsewhere +from gym.benchmarks.registration import benchmark_spec, register_benchmark, registry, register_benchmark_view # imports used elsewhere register_benchmark( - id='FlashRacing-v0', - scorer=scoring.ClipTo01ThenAverage(), - name='FlashRacing', - description='7 flash racing games, goal is best score per time', - tasks=[ - {'env_id': 'flashgames.NeonRace-v0', - 'trials': 1, - 'max_timesteps': 5000000, - 'reward_floor': 175.0, - 'reward_ceiling': 1700.0, - }, - {'env_id': 'flashgames.CoasterRacer-v0', - 'trials': 1, - 'max_timesteps': 5000000, - 'reward_floor': 17.0, - 'reward_ceiling': 400.0, - }, - {'env_id': 'flashgames.HeatRushUsa-v0', - 'trials': 1, - 'max_timesteps': 5000000, - 'reward_floor': 150.0, - 'reward_ceiling': 700.0, - }, - {'env_id': 'flashgames.FormulaRacer-v0', - 'trials': 1, - 'max_timesteps': 5000000, - 'reward_floor': 0.27, - 'reward_ceiling': 1.0, - }, - {'env_id': 'flashgames.DuskDrive-v0', - 'trials': 1, - 'max_timesteps': 5000000, - 'reward_floor': 5000.0, - 'reward_ceiling': 15000.0, - }, - {'env_id': 'flashgames.SpacePunkRacer-v0', - 'trials': 1, - 'max_timesteps': 5000000, - 'reward_floor': 0.67, - 'reward_ceiling': 2.25, - }, - {'env_id': 'flashgames.NeonRace2-v0', - 'trials': 1, - 'max_timesteps': 5000000, - 'reward_floor': 0.0, - 'reward_ceiling': 1200.0, - } - ]) - -register_benchmark( - id='Atari7Pixel-v0', - scorer=scoring.ClipTo01ThenAverage(), - name='Atari7Pixel', + id='Atari200M', + scorer=scoring.TotalReward(), + name='Atari200M', + view_group="Atari", description='7 Atari games, with pixel observations', tasks=[ - {'env_id': 'BeamRider-v0', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 363.9, - 'reward_ceiling': 60000.0, - }, - {'env_id': 'Breakout-v0', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 1.7, - 'reward_ceiling': 800.0, - }, - {'env_id': 'Enduro-v0', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 0.0, - 'reward_ceiling': 5000.0, - }, - {'env_id': 'Pong-v0', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': -20.7, - 'reward_ceiling': 21.0, - }, - {'env_id': 'Qbert-v0', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 163.9, - 'reward_ceiling': 40000.0, - }, - {'env_id': 'Seaquest-v0', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 68.4, - 'reward_ceiling': 100000.0, + { + 'env_id': 'BeamRiderNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 2e8, + 'reward_floor': 363.9, + 'reward_ceiling': 60000.0, + }, + { + 'env_id': 'BreakoutNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 2e8, + 'reward_floor': 1.7, + 'reward_ceiling': 800.0, + }, + { + 'env_id': 'EnduroNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 2e8, + 'reward_floor': 0.0, + 'reward_ceiling': 5000.0, + }, + { + 'env_id': 'PongNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 2e8, + 'reward_floor': -20.7, + 'reward_ceiling': 21.0, + }, + { + 'env_id': 'QbertNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 2e8, + 'reward_floor': 163.9, + 'reward_ceiling': 40000.0, + }, + { + 'env_id': 'SeaquestNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 2e8, + 'reward_floor': 68.4, + 'reward_ceiling': 100000.0, + }, + { + 'env_id': 'SpaceInvadersNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 2e8, + 'reward_floor': 148.0, + 'reward_ceiling': 30000.0, }, - {'env_id': 'SpaceInvaders-v0', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 148.0, - 'reward_ceiling': 30000.0, - } - ]) - -register_benchmark( - id='Atari7PixelDeterministic-v0', - scorer=scoring.ClipTo01ThenAverage(), - name='Atari7PixelDeterministic', - description='7 Atari games, with pixel observations', - tasks=[ - {'env_id': 'BeamRiderDeterministic-v0', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 363.9, - 'reward_ceiling': 60000.0, - }, - {'env_id': 'BreakoutDeterministic-v0', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 1.7, - 'reward_ceiling': 800.0, - }, - {'env_id': 'EnduroDeterministic-v0', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 0.0, - 'reward_ceiling': 5000.0, - }, - {'env_id': 'PongDeterministic-v0', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': -20.7, - 'reward_ceiling': 21.0, - }, - {'env_id': 'QbertDeterministic-v0', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 163.9, - 'reward_ceiling': 40000.0, - }, - {'env_id': 'SeaquestDeterministic-v0', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 68.4, - 'reward_ceiling': 100000.0, - }, - {'env_id': 'SpaceInvadersDeterministic-v0', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 148.0, - 'reward_ceiling': 30000.0, - } ]) register_benchmark( - id='Atari7PixelDeterministic-v3', - scorer=scoring.ClipTo01ThenAverage(), - name='Atari7PixelDeterministic-v3', - description='7 Atari games, with pixel observations', - tasks=[ - {'env_id': 'BeamRiderDeterministic-v3', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 363.9, - 'reward_ceiling': 60000.0, - }, - {'env_id': 'BreakoutDeterministic-v3', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 1.7, - 'reward_ceiling': 800.0, - }, - {'env_id': 'EnduroDeterministic-v3', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 0.0, - 'reward_ceiling': 5000.0, - }, - {'env_id': 'PongDeterministic-v3', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': -20.7, - 'reward_ceiling': 21.0, - }, - {'env_id': 'QbertDeterministic-v3', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 163.9, - 'reward_ceiling': 40000.0, - }, - {'env_id': 'SeaquestDeterministic-v3', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 68.4, - 'reward_ceiling': 100000.0, - }, - {'env_id': 'SpaceInvadersDeterministic-v3', - 'trials': 2, - 'max_timesteps': 25000000, - 'reward_floor': 148.0, - 'reward_ceiling': 30000.0, - } - ]) - -register_benchmark( - id='Atari7Pixel-v3', - scorer=scoring.ClipTo01ThenAverage(), - name='Atari7Pixel-v3', + id='Atari40M', + scorer=scoring.TotalReward(), + name='Atari40M', + view_group="Atari", description='7 Atari games, with pixel observations', tasks=[ - {'env_id': 'BeamRider-v3', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 363.9, - 'reward_ceiling': 60000.0, - }, - {'env_id': 'Breakout-v3', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 1.7, - 'reward_ceiling': 800.0, - }, - {'env_id': 'Enduro-v3', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 0.0, - 'reward_ceiling': 5000.0, - }, - {'env_id': 'Pong-v3', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': -20.7, - 'reward_ceiling': 21.0, - }, - {'env_id': 'Qbert-v3', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 163.9, - 'reward_ceiling': 40000.0, - }, - {'env_id': 'Seaquest-v3', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 68.4, - 'reward_ceiling': 100000.0, - }, - {'env_id': 'SpaceInvaders-v3', - 'trials': 1, - 'max_timesteps': 10000000, - 'reward_floor': 148.0, - 'reward_ceiling': 30000.0, + { + 'env_id': 'BeamRiderNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 4e7, + 'reward_floor': 363.9, + 'reward_ceiling': 60000.0, + }, + { + 'env_id': 'BreakoutNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 4e7, + 'reward_floor': 1.7, + 'reward_ceiling': 800.0, + }, + { + 'env_id': 'EnduroNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 4e7, + 'reward_floor': 0.0, + 'reward_ceiling': 5000.0, + }, + { + 'env_id': 'PongNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 4e7, + 'reward_floor': -20.7, + 'reward_ceiling': 21.0, + }, + { + 'env_id': 'QbertNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 4e7, + 'reward_floor': 163.9, + 'reward_ceiling': 40000.0, + }, + { + 'env_id': 'SeaquestNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 4e7, + 'reward_floor': 68.4, + 'reward_ceiling': 100000.0, + }, + { + 'env_id': 'SpaceInvadersNoFrameskip-v3', + 'trials': 1, + 'max_timesteps': 4e7, + 'reward_floor': 148.0, + 'reward_ceiling': 30000.0, } ]) -register_benchmark( - id='Atari7Ram-v0', - name='Atari7Ram', - description='7 Atari games, with RAM observations', - scorer=scoring.ClipTo01ThenAverage(), - tasks=[ - {'env_id': 'BeamRider-ram-v0', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Breakout-ram-v0', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Enduro-ram-v0', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Pong-ram-v0', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Qbert-ram-v0', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Seaquest-ram-v0', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'SpaceInvaders-ram-v0', - 'trials': 1, - 'max_timesteps': 10000000, - }, - ]) - -register_benchmark( - id='Atari7Ram-v3', - name='Atari7Ram', - description='7 Atari games, with RAM observations', - scorer=scoring.ClipTo01ThenAverage(), - tasks=[ - {'env_id': 'BeamRider-ram-v3', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Breakout-ram-v3', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Enduro-ram-v3', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Pong-ram-v3', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Qbert-ram-v3', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'Seaquest-ram-v3', - 'trials': 1, - 'max_timesteps': 10000000, - }, - {'env_id': 'SpaceInvaders-ram-v3', - 'trials': 1, - 'max_timesteps': 10000000, - }, - ]) - register_benchmark( id='ClassicControl2-v0', name='ClassicControl2', + view_group="Control", description='Simple classic control benchmark', scorer=scoring.ClipTo01ThenAverage(), tasks=[ @@ -344,6 +139,7 @@ register_benchmark( id='ClassicControl-v0', name='ClassicControl', + view_group="Control", description='Simple classic control benchmark', scorer=scoring.ClipTo01ThenAverage(), tasks=[ @@ -378,6 +174,7 @@ register_benchmark( id='Mujoco10M-v0', name='Mujoco10M', + view_group="Control", description='Mujoco benchmark with 10M steps', scorer=scoring.ClipTo01ThenAverage(), tasks=[ @@ -406,6 +203,7 @@ register_benchmark( id='Mujoco1M-v0', name='Mujoco1M', + view_group="Control", description='Mujoco benchmark with 1M steps', scorer=scoring.ClipTo01ThenAverage(), tasks=[ @@ -442,6 +240,7 @@ register_benchmark( id='MinecraftEasy-v0', name='MinecraftEasy', + view_group="Minecraft", description='Minecraft easy benchmark', scorer=scoring.ClipTo01ThenAverage(), tasks=[ @@ -474,6 +273,7 @@ register_benchmark( id='MinecraftMedium-v0', name='MinecraftMedium', + view_group="Minecraft", description='Minecraft medium benchmark', scorer=scoring.ClipTo01ThenAverage(), tasks=[ @@ -506,6 +306,7 @@ register_benchmark( id='MinecraftHard-v0', name='MinecraftHard', + view_group="Minecraft", description='Minecraft hard benchmark', scorer=scoring.ClipTo01ThenAverage(), tasks=[ @@ -538,6 +339,7 @@ register_benchmark( id='MinecraftVeryHard-v0', name='MinecraftVeryHard', + view_group="Minecraft", description='Minecraft very hard benchmark', scorer=scoring.ClipTo01ThenAverage(), tasks=[ @@ -558,6 +360,7 @@ register_benchmark( id='MinecraftImpossible-v0', name='MinecraftImpossible', + view_group="Minecraft", description='Minecraft impossible benchmark', scorer=scoring.ClipTo01ThenAverage(), tasks=[ diff --git a/gym/benchmarks/registration.py b/gym/benchmarks/registration.py index 637d7ebb376..7bbc5b0302a 100644 --- a/gym/benchmarks/registration.py +++ b/gym/benchmarks/registration.py @@ -61,12 +61,49 @@ def score_evaluation(self, env_id, data_sources, initial_reset_timestamps, episo def score_benchmark(self, score_map): return self.scorer.score_benchmark(self, score_map) +BenchmarkView = collections.namedtuple("BenchmarkView", ["name", "benchmarks", "primary", "group"]) + class Registry(object): def __init__(self): - self.benchmarks = collections.OrderedDict() - - def register_benchmark(self, id, **kwargs): - self.benchmarks[id] = Benchmark(id=id, **kwargs) + self.benchmarks = collections.OrderedDict() + self.benchmark_views = collections.OrderedDict() + self.benchmark_view_groups = collections.OrderedDict() + + def register_benchmark_view(self, name, benchmarks, primary, group): + """Sometimes there's very little change between one + benchmark and another. BenchmarkView will allow to + display results from multiple benchmarks in a single + table. + + name: str + Name to display on the website + benchmarks: [str] + list of benchmark ids to include + primary: str + primary benchmark - this is one to be used + to display as the most recent benchmark to be + used when submitting for future evaluations. + group: str + group in which to display the benchmark on the website. + """ + assert name.replace("_", '').replace('-', '').isalnum(), \ + "Name of benchmark must be combination of letters, numbers, - and _" + if group is None: + group = "Miscellaneous" + bw = BenchmarkView(name=name, benchmarks=benchmarks, primary=primary, group=group) + assert bw.primary in bw.benchmarks + self.benchmark_views[bw.name] = bw + if group not in self.benchmark_view_groups: + self.benchmark_view_groups[group] = [] + self.benchmark_view_groups[group].append(bw) + + def register_benchmark(self, id, scorer, tasks, description=None, name=None, add_view=True, view_group=None): + self.benchmarks[id] = Benchmark(id=id, scorer=scorer, tasks=tasks, name=name, description=description) + if add_view: + self.register_benchmark_view(name=name if name is not None else id, + benchmarks=[id], + primary=id, + group=view_group) def benchmark_spec(self, id): try: @@ -75,5 +112,6 @@ def benchmark_spec(self, id): raise error.UnregisteredBenchmark('No registered benchmark with id: {}'.format(id)) registry = Registry() -register_benchmark = registry.register_benchmark -benchmark_spec = registry.benchmark_spec +register_benchmark = registry.register_benchmark +register_benchmark_view = registry.register_benchmark_view +benchmark_spec = registry.benchmark_spec