Merge branch 'master' of https://github.com/rg3/youtube-dl into bilibili
commit
640bb54e73
185 changed files with 3882 additions and 1665 deletions
@ -0,0 +1,57 @@ |
||||
from __future__ import unicode_literals |
||||
|
||||
from .common import InfoExtractor |
||||
from ..utils import ( |
||||
float_or_none, |
||||
parse_iso8601, |
||||
) |
||||
|
||||
|
||||
class ClypIE(InfoExtractor): |
||||
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)' |
||||
_TEST = { |
||||
'url': 'https://clyp.it/ojz2wfah', |
||||
'md5': '1d4961036c41247ecfdcc439c0cddcbb', |
||||
'info_dict': { |
||||
'id': 'ojz2wfah', |
||||
'ext': 'mp3', |
||||
'title': 'Krisson80 - bits wip wip', |
||||
'description': '#Krisson80BitsWipWip #chiptune\n#wip', |
||||
'duration': 263.21, |
||||
'timestamp': 1443515251, |
||||
'upload_date': '20150929', |
||||
}, |
||||
} |
||||
|
||||
def _real_extract(self, url): |
||||
audio_id = self._match_id(url) |
||||
|
||||
metadata = self._download_json( |
||||
'https://api.clyp.it/%s' % audio_id, audio_id) |
||||
|
||||
formats = [] |
||||
for secure in ('', 'Secure'): |
||||
for ext in ('Ogg', 'Mp3'): |
||||
format_id = '%s%s' % (secure, ext) |
||||
format_url = metadata.get('%sUrl' % format_id) |
||||
if format_url: |
||||
formats.append({ |
||||
'url': format_url, |
||||
'format_id': format_id, |
||||
'vcodec': 'none', |
||||
}) |
||||
self._sort_formats(formats) |
||||
|
||||
title = metadata['Title'] |
||||
description = metadata.get('Description') |
||||
duration = float_or_none(metadata.get('Duration')) |
||||
timestamp = parse_iso8601(metadata.get('DateCreated')) |
||||
|
||||
return { |
||||
'id': audio_id, |
||||
'title': title, |
||||
'description': description, |
||||
'duration': duration, |
||||
'timestamp': timestamp, |
||||
'formats': formats, |
||||
} |
@ -0,0 +1,88 @@ |
||||
# coding: utf-8 |
||||
from __future__ import unicode_literals |
||||
|
||||
import re |
||||
import os.path |
||||
|
||||
from .common import InfoExtractor |
||||
from ..compat import compat_urlparse |
||||
from ..utils import ( |
||||
url_basename, |
||||
remove_start, |
||||
) |
||||
|
||||
|
||||
class DemocracynowIE(InfoExtractor): |
||||
_VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)' |
||||
IE_NAME = 'democracynow' |
||||
_TESTS = [{ |
||||
'url': 'http://www.democracynow.org/shows/2015/7/3', |
||||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', |
||||
'info_dict': { |
||||
'id': '2015-0703-001', |
||||
'ext': 'mp4', |
||||
'title': 'July 03, 2015 - Democracy Now!', |
||||
'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs', |
||||
}, |
||||
}, { |
||||
'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree', |
||||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', |
||||
'info_dict': { |
||||
'id': '2015-0703-001', |
||||
'ext': 'mp4', |
||||
'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag', |
||||
'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21', |
||||
}, |
||||
}] |
||||
|
||||
def _real_extract(self, url): |
||||
display_id = self._match_id(url) |
||||
webpage = self._download_webpage(url, display_id) |
||||
description = self._og_search_description(webpage) |
||||
|
||||
json_data = self._parse_json(self._search_regex( |
||||
r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'), |
||||
display_id) |
||||
video_id = None |
||||
formats = [] |
||||
|
||||
default_lang = 'en' |
||||
|
||||
subtitles = {} |
||||
|
||||
def add_subtitle_item(lang, info_dict): |
||||
if lang not in subtitles: |
||||
subtitles[lang] = [] |
||||
subtitles[lang].append(info_dict) |
||||
|
||||
# chapter_file are not subtitles |
||||
if 'caption_file' in json_data: |
||||
add_subtitle_item(default_lang, { |
||||
'url': compat_urlparse.urljoin(url, json_data['caption_file']), |
||||
}) |
||||
|
||||
for subtitle_item in json_data.get('captions', []): |
||||
lang = subtitle_item.get('language', '').lower() or default_lang |
||||
add_subtitle_item(lang, { |
||||
'url': compat_urlparse.urljoin(url, subtitle_item['url']), |
||||
}) |
||||
|
||||
for key in ('file', 'audio', 'video'): |
||||
media_url = json_data.get(key, '') |
||||
if not media_url: |
||||
continue |
||||
media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) |
||||
video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') |
||||
formats.append({ |
||||
'url': media_url, |
||||
}) |
||||
|
||||
self._sort_formats(formats) |
||||
|
||||
return { |
||||
'id': video_id or display_id, |
||||
'title': json_data['title'], |
||||
'description': description, |
||||
'subtitles': subtitles, |
||||
'formats': formats, |
||||
} |
@ -0,0 +1,51 @@ |
||||
# encoding: utf-8 |
||||
from __future__ import unicode_literals |
||||
|
||||
import time |
||||
|
||||
from .common import InfoExtractor |
||||
from ..utils import int_or_none |
||||
|
||||
|
||||
class DPlayIE(InfoExtractor): |
||||
_VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)' |
||||
|
||||
_TEST = { |
||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', |
||||
'info_dict': { |
||||
'id': '3172', |
||||
'ext': 'mp4', |
||||
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet', |
||||
'title': 'Svensken lär sig njuta av livet', |
||||
'duration': 2650, |
||||
}, |
||||
} |
||||
|
||||
def _real_extract(self, url): |
||||
display_id = self._match_id(url) |
||||
webpage = self._download_webpage(url, display_id) |
||||
video_id = self._search_regex( |
||||
r'data-video-id="(\d+)"', webpage, 'video id') |
||||
|
||||
info = self._download_json( |
||||
'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id, |
||||
video_id)['data'][0] |
||||
|
||||
self._set_cookie( |
||||
'secure.dplay.se', 'dsc-geo', |
||||
'{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000)) |
||||
# TODO: consider adding support for 'stream_type=hds', it seems to |
||||
# require setting some cookies |
||||
manifest_url = self._download_json( |
||||
'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id, |
||||
video_id, 'Getting manifest url for hls stream')['hls'] |
||||
formats = self._extract_m3u8_formats( |
||||
manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native') |
||||
|
||||
return { |
||||
'id': video_id, |
||||
'display_id': display_id, |
||||
'title': info['title'], |
||||
'formats': formats, |
||||
'duration': int_or_none(info.get('video_metadata_length'), scale=1000), |
||||
} |
@ -1,39 +1,92 @@ |
||||
# encoding: utf-8 |
||||
from __future__ import unicode_literals |
||||
|
||||
import re |
||||
|
||||
from .common import InfoExtractor |
||||
from .brightcove import BrightcoveIE |
||||
from ..utils import ExtractorError |
||||
from ..utils import ( |
||||
float_or_none, |
||||
int_or_none, |
||||
parse_iso8601, |
||||
sanitized_Request, |
||||
) |
||||
|
||||
|
||||
class EitbIE(InfoExtractor): |
||||
IE_NAME = 'eitb.tv' |
||||
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)' |
||||
_VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P<id>\d+)' |
||||
|
||||
_TEST = { |
||||
'add_ie': ['Brightcove'], |
||||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', |
||||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/', |
||||
'md5': 'edf4436247185adee3ea18ce64c47998', |
||||
'info_dict': { |
||||
'id': '2743577154001', |
||||
'id': '4090227752001', |
||||
'ext': 'mp4', |
||||
'title': '60 minutos (Lasa y Zabala, 30 años)', |
||||
# All videos from eitb has this description in the brightcove info |
||||
'description': '.', |
||||
'uploader': 'Euskal Telebista', |
||||
'description': 'Programa de reportajes de actualidad.', |
||||
'duration': 3996.76, |
||||
'timestamp': 1381789200, |
||||
'upload_date': '20131014', |
||||
'tags': list, |
||||
}, |
||||
} |
||||
|
||||
def _real_extract(self, url): |
||||
mobj = re.match(self._VALID_URL, url) |
||||
chapter_id = mobj.group('chapter_id') |
||||
webpage = self._download_webpage(url, chapter_id) |
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage) |
||||
if bc_url is None: |
||||
raise ExtractorError('Could not extract the Brightcove url') |
||||
# The BrightcoveExperience object doesn't contain the video id, we set |
||||
# it manually |
||||
bc_url += '&%40videoPlayer={0}'.format(chapter_id) |
||||
return self.url_result(bc_url, BrightcoveIE.ie_key()) |
||||
video_id = self._match_id(url) |
||||
|
||||
video = self._download_json( |
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/%s/' % video_id, |
||||
video_id, 'Downloading video JSON') |
||||
|
||||
media = video['web_media'][0] |
||||
|
||||
formats = [] |
||||
for rendition in media['RENDITIONS']: |
||||
video_url = rendition.get('PMD_URL') |
||||
if not video_url: |
||||
continue |
||||
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000) |
||||
format_id = 'http' |
||||
if tbr: |
||||
format_id += '-%d' % int(tbr) |
||||
formats.append({ |
||||
'url': rendition['PMD_URL'], |
||||
'format_id': format_id, |
||||
'width': int_or_none(rendition.get('FRAME_WIDTH')), |
||||
'height': int_or_none(rendition.get('FRAME_HEIGHT')), |
||||
'tbr': tbr, |
||||
}) |
||||
|
||||
hls_url = media.get('HLS_SURL') |
||||
if hls_url: |
||||
request = sanitized_Request( |
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/', |
||||
headers={'Referer': url}) |
||||
token_data = self._download_json( |
||||
request, video_id, 'Downloading auth token', fatal=False) |
||||
if token_data: |
||||
token = token_data.get('token') |
||||
if token: |
||||
m3u8_formats = self._extract_m3u8_formats( |
||||
'%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False) |
||||
if m3u8_formats: |
||||
formats.extend(m3u8_formats) |
||||
|
||||
hds_url = media.get('HDS_SURL') |
||||
if hds_url: |
||||
f4m_formats = self._extract_f4m_formats( |
||||
'%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'), |
||||
video_id, f4m_id='hds', fatal=False) |
||||
if f4m_formats: |
||||
formats.extend(f4m_formats) |
||||
|
||||
self._sort_formats(formats) |
||||
|
||||
return { |
||||
'id': video_id, |
||||
'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'], |
||||
'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'), |
||||
'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'), |
||||
'duration': float_or_none(media.get('LENGTH'), 1000), |
||||
'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '), |
||||
'tags': media.get('TAGS'), |
||||
'formats': formats, |
||||
} |
||||
|
@ -1,19 +1,62 @@ |
||||
from __future__ import unicode_literals |
||||
|
||||
from .mtv import MTVServicesInfoExtractor |
||||
from .common import InfoExtractor |
||||
from ..utils import ( |
||||
int_or_none, |
||||
parse_age_limit, |
||||
url_basename, |
||||
) |
||||
|
||||
|
||||
class GametrailersIE(MTVServicesInfoExtractor): |
||||
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' |
||||
class GametrailersIE(InfoExtractor): |
||||
_VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' |
||||
|
||||
_TEST = { |
||||
'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', |
||||
'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7', |
||||
'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', |
||||
'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a', |
||||
'info_dict': { |
||||
'id': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d', |
||||
'id': '2983958', |
||||
'ext': 'mp4', |
||||
'title': 'E3 2013: Debut Trailer', |
||||
'description': 'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', |
||||
'display_id': '116437-Just-Cause-3-Review', |
||||
'title': 'Just Cause 3 - Review', |
||||
'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?', |
||||
}, |
||||
} |
||||
|
||||
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss' |
||||
def _real_extract(self, url): |
||||
display_id = self._match_id(url) |
||||
webpage = self._download_webpage(url, display_id) |
||||
title = self._html_search_regex( |
||||
r'<title>(.+?)\|', webpage, 'title').strip() |
||||
embed_url = self._proto_relative_url( |
||||
self._search_regex( |
||||
r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage, |
||||
'embed url'), |
||||
scheme='http:') |
||||
video_id = url_basename(embed_url) |
||||
embed_page = self._download_webpage(embed_url, video_id) |
||||
embed_vars_json = self._search_regex( |
||||
r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page, |
||||
'embed vars') |
||||
info = self._parse_json(embed_vars_json, video_id) |
||||
|
||||
formats = [] |
||||
for media in info['media']: |
||||
if media['mediaPurpose'] == 'play': |
||||
formats.append({ |
||||
'url': media['uri'], |
||||
'height': media['height'], |
||||
'width:': media['width'], |
||||
}) |
||||
self._sort_formats(formats) |
||||
|
||||
return { |
||||
'id': video_id, |
||||
'display_id': display_id, |
||||
'title': title, |
||||
'formats': formats, |
||||
'thumbnail': info.get('thumbUri'), |
||||
'description': self._og_search_description(webpage), |
||||
'duration': int_or_none(info.get('videoLengthInSeconds')), |
||||
'age_limit': parse_age_limit(info.get('audienceRating')), |
||||
} |
||||
|
@ -1,64 +1,169 @@ |
||||
# coding: utf-8 |
||||
from __future__ import unicode_literals |
||||
|
||||
import re |
||||
|
||||
from .common import InfoExtractor |
||||
from ..compat import compat_urlparse |
||||
from ..utils import ( |
||||
determine_ext, |
||||
int_or_none, |
||||
parse_duration, |
||||
parse_iso8601, |
||||
xpath_text, |
||||
) |
||||
|
||||
|
||||
class MDRIE(InfoExtractor): |
||||
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)' |
||||
IE_DESC = 'MDR.DE and KiKA' |
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html' |
||||
|
||||
# No tests, MDR regularily deletes its videos |
||||
_TEST = { |
||||
_TESTS = [{ |
||||
# MDR regularily deletes its videos |
||||
'url': 'http://www.mdr.de/fakt/video189002.html', |
||||
'only_matching': True, |
||||
} |
||||
}, { |
||||
# audio |
||||
'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html', |
||||
'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa', |
||||
'info_dict': { |
||||
'id': '1312272', |
||||
'ext': 'mp3', |
||||
'title': 'Feuilleton vom 30. Oktober 2015', |
||||
'duration': 250, |
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK', |
||||
}, |
||||
}, { |
||||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html', |
||||
'md5': '4930515e36b06c111213e80d1e4aad0e', |
||||
'info_dict': { |
||||
'id': '19636', |
||||
'ext': 'mp4', |
||||
'title': 'Baumhaus vom 30. Oktober 2015', |
||||
'duration': 134, |
||||
'uploader': 'KIKA', |
||||
}, |
||||
}, { |
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', |
||||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', |
||||
'info_dict': { |
||||
'id': '8182', |
||||
'ext': 'mp4', |
||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch', |
||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', |
||||
'timestamp': 1419047100, |
||||
'upload_date': '20141220', |
||||
'duration': 4628, |
||||
'uploader': 'KIKA', |
||||
}, |
||||
}, { |
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', |
||||
'only_matching': True, |
||||
}, { |
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html', |
||||
'only_matching': True, |
||||
}] |
||||
|
||||
def _real_extract(self, url): |
||||
m = re.match(self._VALID_URL, url) |
||||
video_id = m.group('video_id') |
||||
domain = m.group('domain') |
||||
video_id = self._match_id(url) |
||||
|
||||
webpage = self._download_webpage(url, video_id) |
||||
|
||||
data_url = self._search_regex( |
||||
r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1', |
||||
webpage, 'data url', group='url') |
||||
|
||||
# determine title and media streams from webpage |
||||
html = self._download_webpage(url, video_id) |
||||
doc = self._download_xml( |
||||
compat_urlparse.urljoin(url, data_url), video_id) |
||||
|
||||
title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title') |
||||
xmlurl = self._search_regex( |
||||
r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL') |
||||
title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True) |
||||
|
||||
doc = self._download_xml(domain + xmlurl, video_id) |
||||
formats = [] |
||||
for a in doc.findall('./assets/asset'): |
||||
url_el = a.find('./progressiveDownloadUrl') |
||||
if url_el is None: |
||||
continue |
||||
abr = int(a.find('bitrateAudio').text) // 1000 |
||||
media_type = a.find('mediaType').text |
||||
format = { |
||||
'abr': abr, |
||||
'filesize': int(a.find('fileSize').text), |
||||
'url': url_el.text, |
||||
} |
||||
|
||||
vbr_el = a.find('bitrateVideo') |
||||
if vbr_el is None: |
||||
format.update({ |
||||
'vcodec': 'none', |
||||
'format_id': '%s-%d' % (media_type, abr), |
||||
}) |
||||
else: |
||||
vbr = int(vbr_el.text) // 1000 |
||||
format.update({ |
||||
'vbr': vbr, |
||||
'width': int(a.find('frameWidth').text), |
||||
'height': int(a.find('frameHeight').text), |
||||
'format_id': '%s-%d' % (media_type, vbr), |
||||
}) |
||||
formats.append(format) |
||||
processed_urls = [] |
||||
for asset in doc.findall('./assets/asset'): |
||||
for source in ( |
||||
'progressiveDownload', |
||||
'dynamicHttpStreamingRedirector', |
||||
'adaptiveHttpStreamingRedirector'): |
||||
url_el = asset.find('./%sUrl' % source) |
||||
if url_el is None: |
||||
continue |
||||
|
||||
video_url = url_el.text |
||||
if video_url in processed_urls: |
||||
continue |
||||
|
||||
processed_urls.append(video_url) |
||||
|
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) |
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) |
||||
|
||||
ext = determine_ext(url_el.text) |
||||
if ext == 'm3u8': |
||||
url_formats = self._extract_m3u8_formats( |
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native', |
||||
preference=0, m3u8_id='HLS', fatal=False) |
||||
elif ext == 'f4m': |
||||
url_formats = self._extract_f4m_formats( |
||||
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, |
||||
preference=0, f4m_id='HDS', fatal=False) |
||||
else: |
||||
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4') |
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) |
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) |
||||
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size')) |
||||
|
||||
f = { |
||||
'url': video_url, |
||||
'format_id': '%s-%d' % (media_type, vbr or abr), |
||||
'filesize': filesize, |
||||
'abr': abr, |
||||
'preference': 1, |
||||
} |
||||
|
||||
if vbr: |
||||
width = int_or_none(xpath_text(asset, './frameWidth', 'width')) |
||||
height = int_or_none(xpath_text(asset, './frameHeight', 'height')) |
||||
f.update({ |
||||
'vbr': vbr, |
||||
'width': width, |
||||
'height': height, |
||||
}) |
||||
|
||||
url_formats = [f] |
||||
|
||||
if not url_formats: |
||||
continue |
||||
|
||||
if not vbr: |
||||
for f in url_formats: |
||||
abr = f.get('tbr') or abr |
||||
if 'tbr' in f: |
||||
del f['tbr'] |
||||
f.update({ |
||||
'abr': abr, |
||||
'vcodec': 'none', |
||||
}) |
||||
|
||||
formats.extend(url_formats) |
||||
|
||||
self._sort_formats(formats) |
||||
|
||||
description = xpath_text(doc, './broadcast/broadcastDescription', 'description') |
||||
timestamp = parse_iso8601( |
||||
xpath_text( |
||||
doc, [ |
||||
'./broadcast/broadcastDate', |
||||
'./broadcast/broadcastStartDate', |
||||
'./broadcast/broadcastEndDate'], |
||||
'timestamp', default=None)) |
||||
duration = parse_duration(xpath_text(doc, './duration', 'duration')) |
||||
uploader = xpath_text(doc, './rights', 'uploader') |
||||
|
||||
return { |
||||
'id': video_id, |
||||
'title': title, |
||||
'description': description, |
||||
'timestamp': timestamp, |
||||
'duration': duration, |
||||
'uploader': uploader, |
||||
'formats': formats, |
||||
} |
||||
|
@ -1,80 +1,40 @@ |
||||
# coding: utf-8 |
||||
from __future__ import unicode_literals |
||||
|
||||
import re |
||||
|
||||
from .common import InfoExtractor |
||||
from ..compat import ( |
||||
compat_str, |
||||
) |
||||
from ..utils import ( |
||||
ExtractorError, |
||||
clean_html, |
||||
) |
||||
from ..utils import sanitized_Request |
||||
|
||||
|
||||
class MovieClipsIE(InfoExtractor): |
||||
_VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?' |
||||
_VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)' |
||||
_TEST = { |
||||
'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/', |
||||
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5', |
||||
'info_dict': { |
||||
'id': 'Wy7ZU', |
||||
'display_id': 'my-week-with-marilyn-movie-do-you-love-me', |
||||
'id': 'pKIGmG83AqD9', |
||||
'display_id': 'warcraft-trailer-1-561180739597', |
||||
'ext': 'mp4', |
||||
'title': 'My Week with Marilyn - Do You Love Me?', |
||||
'description': 'md5:e86795bd332fe3cff461e7c8dc542acb', |
||||
'title': 'Warcraft Trailer 1', |
||||
'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.', |
||||
'thumbnail': 're:^https?://.*\.jpg$', |
||||
}, |
||||
'params': { |
||||
# rtmp download |
||||
'skip_download': True, |
||||
} |
||||
'add_ie': ['ThePlatform'], |
||||
} |
||||
|
||||
def _real_extract(self, url): |
||||
mobj = re.match(self._VALID_URL, url) |
||||
video_id = mobj.group('id') |
||||
display_id = mobj.group('display_id') |
||||
show_id = display_id or video_id |
||||
|
||||
config = self._download_xml( |
||||
'http://config.movieclips.com/player/config/%s' % video_id, |
||||
show_id, 'Downloading player config') |
||||
|
||||
if config.find('./country-region').text == 'false': |
||||
raise ExtractorError( |
||||
'%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True) |
||||
|
||||
properties = config.find('./video/properties') |
||||
smil_file = properties.attrib['smil_file'] |
||||
display_id = self._match_id(url) |
||||
|
||||
smil = self._download_xml(smil_file, show_id, 'Downloading SMIL') |
||||
base_url = smil.find('./head/meta').attrib['base'] |
||||
|
||||
formats = [] |
||||
for video in smil.findall('./body/switch/video'): |
||||
vbr = int(video.attrib['system-bitrate']) / 1000 |
||||
src = video.attrib['src'] |
||||
formats.append({ |
||||
'url': base_url, |
||||
'play_path': src, |
||||
'ext': src.split(':')[0], |
||||
'vbr': vbr, |
||||
'format_id': '%dk' % vbr, |
||||
}) |
||||
|
||||
self._sort_formats(formats) |
||||
|
||||
title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title']) |
||||
description = clean_html(compat_str(properties.attrib['clip_description'])) |
||||
thumbnail = properties.attrib['image'] |
||||
categories = properties.attrib['clip_categories'].split(',') |
||||
req = sanitized_Request(url) |
||||
# it doesn't work if it thinks the browser it's too old |
||||
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)') |
||||
webpage = self._download_webpage(req, display_id) |
||||
theplatform_link = self._html_search_regex(r'src="(http://player.theplatform.com/p/.*?)"', webpage, 'theplatform link') |
||||
title = self._html_search_regex(r'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage, 'title') |
||||
description = self._html_search_meta('description', webpage) |
||||
|
||||
return { |
||||
'id': video_id, |
||||
'display_id': display_id, |
||||
'_type': 'url_transparent', |
||||
'url': theplatform_link, |
||||
'title': title, |
||||
'display_id': display_id, |
||||
'description': description, |
||||
'thumbnail': thumbnail, |
||||
'categories': categories, |
||||
'formats': formats, |
||||
} |
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue