Skip to content

Commit

Permalink
retry downloading the medium
Browse files Browse the repository at this point in the history
  • Loading branch information
dixudx committed May 6, 2016
1 parent 6022da4 commit 30a3e81
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 28 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ or videos.
# Setting timeout
TIMEOUT = 10
# Retry times
RETRY = 5
# Medium Index Number that Starts from
START = 0
Expand All @@ -82,6 +85,9 @@ MEDIA_NUM = 50
You can set `TIMEOUT` to another value, e.g. 50, according to
your network quality.

And this script will retry downloading the images or videos several
times (default value is 5).

You can also only download photos or videos by commenting

```python
Expand Down
66 changes: 38 additions & 28 deletions tumblr-photo-video-ripper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
# Setting timeout
TIMEOUT = 10

# Retry times
RETRY = 5

# Medium Index Number that Starts from
START = 0

Expand All @@ -23,11 +26,11 @@ def download_media(site):
download_videos(site)


def download_videos(site, target_folder=None):
def download_videos(site):
_download_media(site, "video", START)


def download_photos(site, target_folder=None):
def download_photos(site):
_download_media(site, "photo", START)


Expand Down Expand Up @@ -69,6 +72,7 @@ def _handle_medium_url(medium_type, post):


def _download_medium(medium_type, medium_url, folder_name):
socket.setdefaulttimeout(TIMEOUT)
medium_name = medium_url.split("/")[-1]
if medium_type == "video":
if not medium_name.startswith("tumblr"):
Expand All @@ -77,51 +81,57 @@ def _download_medium(medium_type, medium_url, folder_name):

file_path = os.path.join(folder_name, medium_name)
if not os.path.isfile(file_path):
try:
socket.setdefaulttimeout(TIMEOUT)
print("Downloading %s from %s.\n" % (medium_name,
medium_url))
urllib.urlretrieve(medium_url, filename=file_path)
except:
print("Downloading %s from %s.\n" % (medium_name,
medium_url))
retry_times = 0
while retry_times < RETRY:
try:
urllib.urlretrieve(medium_url, filename=file_path)
break
except:
# try again
pass
retry_times += 1
else:
os.remove(file_path)
print("Failed to retrieve %s from %s.\n" % (medium_type,
medium_url))


def usage():
print 'Please create file sites.txt under this same directory'
print 'in sites.txt, specify tumblr sites, separated by comma and no space'
print 'save the file and retry'
print 'Sample: site1,site2'
print ''
print 'Or use command line options'
print 'Sample: python tumblr-photo-video-ripper.py site1,site2'
print ''
print u'未找到sites.txt文件,请创建'
print u'请在文件中指定Tumblr站点名,并以逗号分割,不要有空格'
print u'保存文件并重试'
print u'例子: site1,site2'
print ''
print u'或者使用命令行参数指定站点'
print u'例子: python tumblr-photo-video-ripper.py site1,site2'
print("Please create file sites.txt under this same directory")
print("in sites.txt, specify tumblr sites, separated by comma and no space")
print("save the file and retry")
print("Sample: site1,site2")
print("\n")
print("Or use command line options")
print("Sample: python tumblr-photo-video-ripper.py site1,site2")
print("\n")
print(u"未找到sites.txt文件,请创建")
print(u"请在文件中指定Tumblr站点名,并以逗号分割,不要有空格")
print(u"保存文件并重试")
print(u"例子: site1,site2")
print("\n")
print(u"或者使用命令行参数指定站点")
print(u"例子: python tumblr-photo-video-ripper.py site1,site2")


if __name__ == "__main__":
sites = None

if len(sys.argv) < 2:
# check the sites file
filename = 'sites.txt'
filename = "sites.txt"
if os.path.exists(filename):
with open(filename, 'r') as f:
sites = f.read().rstrip().lstrip().split(',')
with open(filename, "r") as f:
sites = f.read().rstrip().lstrip().split(",")
else:
print usage()
sys.exit(1)
else:
sites = sys.argv[1].split(',')
sites = sys.argv[1].split(",")

if len(sites) == 0 or sites[0] == '':
if len(sites) == 0 or sites[0] == "":
print usage()
sys.exit(1)

Expand Down

0 comments on commit 30a3e81

Please sign in to comment.