From 445d72b8b5cdce331833646ba856efe946253056 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 21 Feb 2016 16:41:24 +0800 Subject: [PATCH] [twitter:amplify] Add TwitterAmplifyIE for handling Twitter smart URLs Closes #8075 --- youtube_dl/extractor/__init__.py | 6 ++++- youtube_dl/extractor/twitter.py | 41 ++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1edbfbd28..1ae606f1e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -798,7 +798,11 @@ from .twitch import ( TwitchBookmarksIE, TwitchStreamIE, ) -from .twitter import TwitterCardIE, TwitterIE +from .twitter import ( + TwitterCardIE, + TwitterIE, + TwitterAmplifyIE, +) from .ubu import UbuIE from .udemy import ( UdemyIE, diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index a161f046b..77c9176e4 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -14,7 +14,13 @@ from ..utils import ( ) -class TwitterCardIE(InfoExtractor): +class TwitterBaseIE(InfoExtractor): + def _get_vmap_video_url(self, vmap_url, video_id): + vmap_data = self._download_xml(vmap_url, video_id) + return xpath_text(vmap_data, './/MediaFile').strip() + + +class TwitterCardIE(TwitterBaseIE): IE_NAME = 'twitter:card' _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P\d+)' _TESTS = [ @@ -96,10 +102,8 @@ class TwitterCardIE(InfoExtractor): video_id) if 'playlist' not in config: if 'vmapUrl' in config: - vmap_data = self._download_xml(config['vmapUrl'], video_id) - video_url = xpath_text(vmap_data, './/MediaFile').strip() formats.append({ - 'url': video_url, + 'url': self._get_vmap_video_url(config['vmapUrl'], video_id), }) break # same video regardless of UA continue @@ -226,3 +230,32 @@ class TwitterIE(InfoExtractor): return info raise ExtractorError('There\'s not video in this tweet.') + + +class TwitterAmplifyIE(TwitterBaseIE): + IE_NAME = 'twitter:amplify' + _VALID_URL = 'https?://amp\.twimg\.com/v/(?P[0-9a-f\-]{36})' + + _TEST = { + 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951', + 'md5': '7df102d0b9fd7066b86f3159f8e81bf6', + 'info_dict': { + 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951', + 'ext': 'mp4', + 'title': 'Twitter Video', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + vmap_url = self._html_search_meta( + 'twitter:amplify:vmap', webpage, 'vmap url') + video_url = self._get_vmap_video_url(vmap_url, video_id) + + return { + 'id': video_id, + 'title': 'Twitter Video', + 'url': video_url, + }