* Add descriptions of scorers

* Make null_score a property * Scorers are proper classes
yichao-liang · Feb 28, 2017 · 6f27709 · 6f27709
1 parent ceda767
commit 6f27709
Showing 1 changed file with 56 additions and 24 deletions.
diff --git a/gym/benchmarks/scoring.py b/gym/benchmarks/scoring.py
@@ -45,7 +45,7 @@ def benchmark_aggregate_score(benchmark, env_id_to_benchmark_results):
             else:
                 # no matching benchmark result for this trial
                 env_scores = scores.setdefault(env_id, [])
-                env_scores.append([benchmark.scorer.null_score() for _ in task_list])
+                env_scores.append([benchmark.scorer.null_score for _ in task_list])
                 solves[env_id] = False
 
     score = benchmark.score_benchmark(scores)
@@ -80,6 +80,15 @@ class ClipTo01ThenAverage(object):
     def __init__(self, num_episodes=100):
         self.num_episodes = num_episodes
 
+    @property
+    def description(self):
+        return """
+The scorer takes the average reward over the last {num_episodes} full episodes collected before a certain number of steps or seconds of experience have elapsed for each trial.
+
+This reward is clipped and normalized to be between 0.0 and 1.0 using thresholds defined on a per-environment basis.
+        """.rstrip().format(num_episodes=self.num_episodes)
+
+    @property
     def null_score(self):
         """
         This is used to compute benchmark scores when we are missing an evaluation
@@ -301,6 +310,7 @@ class BenchmarkScoringRule(object):
     def __init__(self, score_and_solved_func):
         self.score_and_solved_func = score_and_solved_func
 
+    @property
     def null_score(self):
         return 0.0
 
@@ -392,32 +402,54 @@ def score_benchmark(self, benchmark, episode_scores):
 
         return np.mean(all_scores)
 
-def TotalReward():
-    def total_reward_from_episode_rewards(task, reward, elapsed_seconds):
-        "TotalReward scoring takes the mean of all rewards earned over the course of the episode and clips it between reward_floor and reward_ceiling"
-        # reward is an array containing valid rewards for the episode
-        floor = task.reward_floor
-        ceiling = task.reward_ceiling
 
-        solved = reward >= ceiling
-        # Sum raw rewards, linearly rescale to between 0 and 1
-        score = np.clip((np.mean(reward) - floor) / (ceiling - floor), 0, 1)
-        return score, solved
+def total_reward_from_episode_rewards(task, reward, elapsed_seconds):
+    "TotalReward scoring takes the mean of all rewards earned over the course of the episode and clips it between reward_floor and reward_ceiling"
+    # reward is an array containing valid rewards for the episode
+    floor = task.reward_floor
+    ceiling = task.reward_ceiling
+
+    solved = reward >= ceiling
+    # Sum raw rewards, linearly rescale to between 0 and 1
+    score = np.clip((np.mean(reward) - floor) / (ceiling - floor), 0, 1)
+    return score, solved
+
+
+class TotalReward(BenchmarkScoringRule):
+    def __init__(self):
+        super(TotalReward, self).__init__(total_reward_from_episode_rewards)
+
+    @property
+    def description(self):
+        return """
+The scorer takes the average reward over all episodes collected before a certain number of steps or seconds of experience have elapsed for each trial.
+
+This reward is clipped and normalized to be between 0.0 and 1.0 using thresholds defined on a per-environment basis.
+        """.rstrip()
+
+
+def reward_per_time_from_episode_rewards(task, reward, elapsed_seconds):
+    "RewardPerTime scoring takes the total reward earned over the course of the episode, divides by the elapsed time, and clips it between reward_floor and reward_ceiling"
+    floor = task.reward_floor
+    ceiling = task.reward_ceiling
+
+    # TODO actually compute solves for this
+    solved = np.zeros(len(reward))
 
-    return BenchmarkScoringRule(total_reward_from_episode_rewards)
+    # Sum the rewards for all episodes, divide by total time taken for all episodes
+    reward_per_second = np.sum(reward) / elapsed_seconds[-1] if np.any(elapsed_seconds) else 0.0
+    score = np.clip((reward_per_second - floor) / (ceiling - floor), 0, 1)
+    return score, solved
 
-def RewardPerTime():
-    def reward_per_time_from_episode_rewards(task, reward, elapsed_seconds):
-        "RewardPerTime scoring takes the total reward earned over the course of the episode, divides by the elapsed time, and clips it between reward_floor and reward_ceiling"
-        floor = task.reward_floor
-        ceiling = task.reward_ceiling
 
-        # TODO actually compute solves for this
-        solved = np.zeros(len(reward))
+class RewardPerTime(BenchmarkScoringRule):
+    def __init__(self):
+        super(RewardPerTime, self).__init__(reward_per_time_from_episode_rewards)
 
-        # Sum the rewards for all episodes, divide by total time taken for all episodes
-        reward_per_second = np.sum(reward) / elapsed_seconds[-1] if np.any(elapsed_seconds) else 0.0
-        score = np.clip((reward_per_second - floor) / (ceiling - floor), 0, 1)
-        return score, solved
+    @property
+    def description(self):
+        return """
+The score is the average reward divided by the number of timesteps across all episodes collected before a certain number of steps or seconds of experience have elapsed for each trial.
 
-    return BenchmarkScoringRule(reward_per_time_from_episode_rewards)
+This reward is clipped and normalized to be between 0.0 and 1.0 using thresholds defined on a per-environment basis.
+        """.rstrip()