-
Notifications
You must be signed in to change notification settings - Fork 21
/
download_process.py
89 lines (70 loc) · 1.99 KB
/
download_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# !/usr/bin/env python3
# -*- coding: utf-8 -*-
import time
import os
import urllib.request
import requests
from multiprocessing import Process, Queue, Pool, freeze_support
url_filename = 'url_list.txt'
PROXIES = { "http": "http://127.0.0.1:1080", "https": "https://127.0.0.1:1080" }
# PROXIES = {}
def get_url():
with open(url_filename, "r") as f:
raw_sites = f.read()
raw_sites = raw_sites.replace("\n", ",")
raw_sites = raw_sites.split(",")
sites = list()
for raw_site in raw_sites:
site = raw_site.lstrip().rstrip()
if site:
sites.append(site)
print('list_len = ',len(sites))
return sites
def get_filename(url):
name = url.split("/")[-1].split("?")[0]
return name
def download_one(url):
name = get_filename(url)
file_path = os.path.join(name)
if not os.path.isfile(file_path):
try:
r = requests.get(url,proxies=PROXIES) # use proxy
print('downloading ->',name)
with open(name, "wb") as code:
code.write(r.content)
except Exception as e:
print('downloading err ->', name)
pass
else:
print("file exist")
def chdir():
current_folder = os.getcwd()
target_folder = os.path.join(current_folder, 'download')
if not os.path.isdir(target_folder):
os.mkdir(target_folder)
os.chdir(target_folder)
# print(target_folder)
def download(imgs, processes=10):
""" 并发下载所有图片 """
# start_time = time.time()
pool = Pool(processes)
for img in imgs:
pool.apply_async(download_one, (img, ))
pool.close()
pool.join()
# end_time = time.time()
# print('下载完毕,用时:%s秒' % (end_time - start_time))
# if __name__ == '__main__':
def main():
# 解决使用 pyinstaller 打包程序后,多进程错误
# freeze_support()
print('#3 download_process main()')
print('downloading...............')
url = get_url()
chdir()
start_time = time.time()
download(url)
end_time = time.time()
print('下载完毕,用时:%s秒' % (end_time - start_time))
if __name__ == '__main__':
main()