Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YouPornIE] Extract all encrypted links and remove doubles at the end #1657

Merged
merged 2 commits into from
Oct 28, 2013
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[YouPornIE] Extract all encrypted links and remove doubles at the end
  • Loading branch information
rzhxeo committed Oct 26, 2013
commit 7df286540f893f7fbba07da8ba3b09dd7c9027c4
2 changes: 1 addition & 1 deletion youtube_dl/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def process_video_result(self, info_dict, download=True):
info_dict['playlist_index'] = None

# This extractors handle format selection themselves
if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
if info_dict['extractor'] in [u'youtube', u'Youku', u'mixcloud']:
if download:
self.process_info(info_dict)
return info_dict
Expand Down
78 changes: 26 additions & 52 deletions youtube_dl/extractor/youporn.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,6 @@ class YouPornIE(InfoExtractor):
}
}

def _print_formats(self, formats):
"""Print all available formats"""
print(u'Available formats:')
print(u'ext\t\tformat')
print(u'---------------------------------')
for format in formats:
print(u'%s\t\t%s' % (format['ext'], format['format']))

def _specific(self, req_format, formats):
for x in formats:
if x["format"] == req_format:
return x
return None

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
Expand All @@ -71,27 +57,22 @@ def _real_extract(self, url):
except KeyError:
raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])

# Get all of the formats available
# Get all of the links from the page
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
webpage, u'download list').strip()

# Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
LINK_RE = r'<a href="([^"]+)">'
links = re.findall(LINK_RE, download_list_html)

# Get link of hd video if available
mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
if mobj != None:
encrypted_video_url = mobj.group(u'encrypted_video_url')
video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
links = [video_url] + links

# Get all encrypted links
encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage)
for encrypted_link in encrypted_links:
link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
links.append(link)

if not links:
raise ExtractorError(u'ERROR: no known formats available for video')

self.to_screen(u'Links found: %d' % len(links))

formats = []
for link in links:

Expand All @@ -103,39 +84,32 @@ def _real_extract(self, url):
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
format = path.split('/')[4].split('_')[:2]

# size = format[0]
# bitrate = format[1]
format = "-".join( format )
# title = u'%s-%s-%s' % (video_title, size, bitrate)

formats.append({
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': upload_date,
'title': video_title,
'ext': extension,
'format': format,
'thumbnail': thumbnail,
'description': video_description,
'age_limit': age_limit,
'format_id': format,
})

if self._downloader.params.get('listformats', None):
self._print_formats(formats)
return

req_format = self._downloader.params.get('format', 'best')
self.to_screen(u'Format: %s' % req_format)

if req_format is None or req_format == 'best':
return [formats[0]]
elif req_format == 'worst':
return [formats[-1]]
elif req_format in ('-1', 'all'):
return formats
else:
format = self._specific( req_format, formats )
if format is None:
raise ExtractorError(u'Requested format not available')
return [format]
# Sort and remove doubles
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
for i in range(len(formats)-1,0,-1):
if formats[i]['format_id'] == formats[i-1]['format_id']:
del formats[i]

return {
'id': video_id,
'uploader': video_uploader,
'upload_date': upload_date,
'title': video_title,
'thumbnail': thumbnail,
'description': video_description,
'age_limit': age_limit,
'formats': formats,
}