Skip to content

Commit

Permalink
* Add descriptions of scorers
Browse files Browse the repository at this point in the history
* Make null_score a property

* Scorers are proper classes
  • Loading branch information
jietang committed Feb 28, 2017
1 parent ceda767 commit 6f27709
Showing 1 changed file with 56 additions and 24 deletions.
80 changes: 56 additions & 24 deletions gym/benchmarks/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def benchmark_aggregate_score(benchmark, env_id_to_benchmark_results):
else:
# no matching benchmark result for this trial
env_scores = scores.setdefault(env_id, [])
env_scores.append([benchmark.scorer.null_score() for _ in task_list])
env_scores.append([benchmark.scorer.null_score for _ in task_list])
solves[env_id] = False

score = benchmark.score_benchmark(scores)
Expand Down Expand Up @@ -80,6 +80,15 @@ class ClipTo01ThenAverage(object):
def __init__(self, num_episodes=100):
self.num_episodes = num_episodes

@property
def description(self):
return """
The scorer takes the average reward over the last {num_episodes} full episodes collected before a certain number of steps or seconds of experience have elapsed for each trial.
This reward is clipped and normalized to be between 0.0 and 1.0 using thresholds defined on a per-environment basis.
""".rstrip().format(num_episodes=self.num_episodes)

@property
def null_score(self):
"""
This is used to compute benchmark scores when we are missing an evaluation
Expand Down Expand Up @@ -301,6 +310,7 @@ class BenchmarkScoringRule(object):
def __init__(self, score_and_solved_func):
self.score_and_solved_func = score_and_solved_func

@property
def null_score(self):
return 0.0

Expand Down Expand Up @@ -392,32 +402,54 @@ def score_benchmark(self, benchmark, episode_scores):

return np.mean(all_scores)

def TotalReward():
def total_reward_from_episode_rewards(task, reward, elapsed_seconds):
"TotalReward scoring takes the mean of all rewards earned over the course of the episode and clips it between reward_floor and reward_ceiling"
# reward is an array containing valid rewards for the episode
floor = task.reward_floor
ceiling = task.reward_ceiling

solved = reward >= ceiling
# Sum raw rewards, linearly rescale to between 0 and 1
score = np.clip((np.mean(reward) - floor) / (ceiling - floor), 0, 1)
return score, solved
def total_reward_from_episode_rewards(task, reward, elapsed_seconds):
"TotalReward scoring takes the mean of all rewards earned over the course of the episode and clips it between reward_floor and reward_ceiling"
# reward is an array containing valid rewards for the episode
floor = task.reward_floor
ceiling = task.reward_ceiling

solved = reward >= ceiling
# Sum raw rewards, linearly rescale to between 0 and 1
score = np.clip((np.mean(reward) - floor) / (ceiling - floor), 0, 1)
return score, solved


class TotalReward(BenchmarkScoringRule):
def __init__(self):
super(TotalReward, self).__init__(total_reward_from_episode_rewards)

@property
def description(self):
return """
The scorer takes the average reward over all episodes collected before a certain number of steps or seconds of experience have elapsed for each trial.
This reward is clipped and normalized to be between 0.0 and 1.0 using thresholds defined on a per-environment basis.
""".rstrip()


def reward_per_time_from_episode_rewards(task, reward, elapsed_seconds):
"RewardPerTime scoring takes the total reward earned over the course of the episode, divides by the elapsed time, and clips it between reward_floor and reward_ceiling"
floor = task.reward_floor
ceiling = task.reward_ceiling

# TODO actually compute solves for this
solved = np.zeros(len(reward))

return BenchmarkScoringRule(total_reward_from_episode_rewards)
# Sum the rewards for all episodes, divide by total time taken for all episodes
reward_per_second = np.sum(reward) / elapsed_seconds[-1] if np.any(elapsed_seconds) else 0.0
score = np.clip((reward_per_second - floor) / (ceiling - floor), 0, 1)
return score, solved

def RewardPerTime():
def reward_per_time_from_episode_rewards(task, reward, elapsed_seconds):
"RewardPerTime scoring takes the total reward earned over the course of the episode, divides by the elapsed time, and clips it between reward_floor and reward_ceiling"
floor = task.reward_floor
ceiling = task.reward_ceiling

# TODO actually compute solves for this
solved = np.zeros(len(reward))
class RewardPerTime(BenchmarkScoringRule):
def __init__(self):
super(RewardPerTime, self).__init__(reward_per_time_from_episode_rewards)

# Sum the rewards for all episodes, divide by total time taken for all episodes
reward_per_second = np.sum(reward) / elapsed_seconds[-1] if np.any(elapsed_seconds) else 0.0
score = np.clip((reward_per_second - floor) / (ceiling - floor), 0, 1)
return score, solved
@property
def description(self):
return """
The score is the average reward divided by the number of timesteps across all episodes collected before a certain number of steps or seconds of experience have elapsed for each trial.
return BenchmarkScoringRule(reward_per_time_from_episode_rewards)
This reward is clipped and normalized to be between 0.0 and 1.0 using thresholds defined on a per-environment basis.
""".rstrip()

0 comments on commit 6f27709

Please sign in to comment.