Skip to content

Commit

Permalink
Add retries to benchmark upload
Browse files Browse the repository at this point in the history
  • Loading branch information
jietang committed Mar 6, 2017
1 parent 07d9885 commit 28913da
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 1 deletion.
4 changes: 4 additions & 0 deletions gym/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,7 @@ class DoubleWrapperError(Error):

class WrapAfterConfigureError(Error):
pass


class RetriesExceededError(Error):
pass
8 changes: 7 additions & 1 deletion gym/scoreboard/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,13 @@ def _upload_benchmark(training_dir, algorithm_id, benchmark_id, benchmark_run_ta
# Actually do the uploads.
for training_dir in directories:
# N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark
_upload(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos)
_upload_with_retries = util.retry_exponential_backoff(
_upload,
(error.APIConnectionError,),
max_retries=5,
interval=3,
)
_upload_with_retries(training_dir, None, None, benchmark_run_id, api_key, ignore_open_monitors, skip_videos)

logger.info("""
****************************************************
Expand Down
31 changes: 31 additions & 0 deletions gym/scoreboard/client/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import functools
import logging
import os
import random
import sys
import time

from gym import error

logger = logging.getLogger(__name__)

Expand All @@ -12,3 +17,29 @@ def utf8(value):

def file_size(f):
return os.fstat(f.fileno()).st_size

def retry_exponential_backoff(f, errors, max_retries=5, interval=1):
@functools.wraps(f)
def wrapped(*args, **kwargs):
num_retries = 0
caught_errors = []
while True:
try:
result = f(*args, **kwargs)
except errors as e:
logger.error("Caught error in %s: %s" % (f.__name__, e))
caught_errors.append(e)

if num_retries < max_retries:
backoff = random.randint(1, 2 ** num_retries) * interval
logger.error("Retrying in %.1fs..." % backoff)
time.sleep(backoff)
num_retries += 1
else:
msg = "Exceeded allowed retries. Here are the individual error messages:\n\n"
msg += "\n\n".join("%s: %s" % (type(e).__name__, str(e)) for e in caught_errors)
raise error.RetriesExceededError(msg)
else:
break
return result
return wrapped

0 comments on commit 28913da

Please sign in to comment.