retry downloading the medium

sorakiseki24 · May 6, 2016 · 30a3e81 · 30a3e81
1 parent 6022da4
commit 30a3e81
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 28 deletions.
diff --git a/README.md b/README.md
@@ -72,6 +72,9 @@ or videos.
 # Setting timeout
 TIMEOUT = 10
 
+# Retry times
+RETRY = 5
+
 # Medium Index Number that Starts from
 START = 0
 
@@ -82,6 +85,9 @@ MEDIA_NUM = 50
 You can set `TIMEOUT` to another value, e.g. 50, according to
 your network quality.
 
+And this script will retry downloading the images or videos several
+times (default value is 5).
+
 You can also only download photos or videos by commenting
 
 ```python

diff --git a/tumblr-photo-video-ripper.py b/tumblr-photo-video-ripper.py
@@ -11,6 +11,9 @@
 # Setting timeout
 TIMEOUT = 10
 
+# Retry times
+RETRY = 5
+
 # Medium Index Number that Starts from
 START = 0
 
@@ -23,11 +26,11 @@ def download_media(site):
     download_videos(site)
 
 
-def download_videos(site, target_folder=None):
+def download_videos(site):
     _download_media(site, "video", START)
 
 
-def download_photos(site, target_folder=None):
+def download_photos(site):
     _download_media(site, "photo", START)
 
 
@@ -69,6 +72,7 @@ def _handle_medium_url(medium_type, post):
 
 
 def _download_medium(medium_type, medium_url, folder_name):
+    socket.setdefaulttimeout(TIMEOUT)
     medium_name = medium_url.split("/")[-1]
     if medium_type == "video":
         if not medium_name.startswith("tumblr"):
@@ -77,51 +81,57 @@ def _download_medium(medium_type, medium_url, folder_name):
 
     file_path = os.path.join(folder_name, medium_name)
     if not os.path.isfile(file_path):
-        try:
-            socket.setdefaulttimeout(TIMEOUT)
-            print("Downloading %s from %s.\n" % (medium_name,
-                                                 medium_url))
-            urllib.urlretrieve(medium_url, filename=file_path)
-        except:
+        print("Downloading %s from %s.\n" % (medium_name,
+                                             medium_url))
+        retry_times = 0
+        while retry_times < RETRY:
+            try:
+                urllib.urlretrieve(medium_url, filename=file_path)
+                break
+            except:
+                # try again
+                pass
+            retry_times += 1
+        else:
             os.remove(file_path)
             print("Failed to retrieve %s from %s.\n" % (medium_type,
                                                         medium_url))
 
 
 def usage():
-    print 'Please create file sites.txt under this same directory'
-    print 'in sites.txt, specify tumblr sites, separated by comma and no space'
-    print 'save the file and retry'
-    print 'Sample: site1,site2'
-    print ''
-    print 'Or use command line options'
-    print 'Sample: python tumblr-photo-video-ripper.py site1,site2'
-    print ''
-    print u'未找到sites.txt文件，请创建'
-    print u'请在文件中指定Tumblr站点名，并以逗号分割，不要有空格'
-    print u'保存文件并重试'
-    print u'例子: site1,site2'
-    print ''
-    print u'或者使用命令行参数指定站点'
-    print u'例子: python tumblr-photo-video-ripper.py site1,site2'
+    print("Please create file sites.txt under this same directory")
+    print("in sites.txt, specify tumblr sites, separated by comma and no space")
+    print("save the file and retry")
+    print("Sample: site1,site2")
+    print("\n")
+    print("Or use command line options")
+    print("Sample: python tumblr-photo-video-ripper.py site1,site2")
+    print("\n")
+    print(u"未找到sites.txt文件，请创建")
+    print(u"请在文件中指定Tumblr站点名，并以逗号分割，不要有空格")
+    print(u"保存文件并重试")
+    print(u"例子: site1,site2")
+    print("\n")
+    print(u"或者使用命令行参数指定站点")
+    print(u"例子: python tumblr-photo-video-ripper.py site1,site2")
 
 
 if __name__ == "__main__":
     sites = None
 
     if len(sys.argv) < 2:
         # check the sites file
-        filename = 'sites.txt'
+        filename = "sites.txt"
         if os.path.exists(filename):
-            with open(filename, 'r') as f:
-                sites = f.read().rstrip().lstrip().split(',')
+            with open(filename, "r") as f:
+                sites = f.read().rstrip().lstrip().split(",")
         else:
             print usage()
             sys.exit(1)
     else:
-        sites = sys.argv[1].split(',')
+        sites = sys.argv[1].split(",")
 
-    if len(sites) == 0 or sites[0] == '':
+    if len(sites) == 0 or sites[0] == "":
         print usage()
         sys.exit(1)