Move common code for extractors based in MTV services to a new base class

Removes the duplication of the thumbnail extraction code (only MTVIE needs to override it)
master
Jaime Marquínez Ferrándiz 11 years ago
parent fb7abb31af
commit 84db81815a
  1. 10
      youtube_dl/extractor/comedycentral.py
  2. 16
      youtube_dl/extractor/gametrailers.py
  3. 69
      youtube_dl/extractor/mtv.py
  4. 13
      youtube_dl/extractor/southparkstudios.py

@ -1,7 +1,7 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .mtv import MTVIE, _media_xml_tag from .mtv import MTVServicesInfoExtractor
from ..utils import ( from ..utils import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
@ -11,7 +11,7 @@ from ..utils import (
) )
class ComedyCentralIE(MTVIE): class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
_FEED_URL = u'http://comedycentral.com/feeds/mrss/' _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE):
u'description': u'After a certain point, breastfeeding becomes c**kblocking.', u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
}, },
} }
# Overwrite MTVIE properties we don't want
_TESTS = []
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

@ -1,13 +1,11 @@
import re import re
from .mtv import MTVIE, _media_xml_tag from .mtv import MTVServicesInfoExtractor
class GametrailersIE(MTVIE):
""" class GametrailersIE(MTVServicesInfoExtractor):
Gametrailers use the same videos system as MTVIE, it just changes the feed
url, where the uri is and the method to get the thumbnails.
"""
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
_TEST = { _TEST = {
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4', u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
@ -17,15 +15,9 @@ class GametrailersIE(MTVIE):
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
}, },
} }
# Overwrite MTVIE properties we don't want
_TESTS = []
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss' _FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')

@ -10,35 +10,8 @@ from ..utils import (
def _media_xml_tag(tag): def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag return '{http://search.yahoo.com/mrss/}%s' % tag
class MTVIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
_TESTS = [
{
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
u'file': u'853555.mp4',
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
u'info_dict': {
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
u'info_dict': {
u'title': u'Everything Has Changed',
u'upload_date': u'20130606',
u'uploader': u'Taylor Swift',
},
u'skip': u'VEVO is only available in some countries',
},
]
class MTVServicesInfoExtractor(InfoExtractor):
@staticmethod @staticmethod
def _id_from_uri(uri): def _id_from_uri(uri):
return uri.split(':')[-1] return uri.split(':')[-1]
@ -53,7 +26,12 @@ class MTVIE(InfoExtractor):
return base + m.group('finalid') return base + m.group('finalid')
def _get_thumbnail_url(self, uri, itemdoc): def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
else:
return thumb_node.attrib['url']
def _extract_video_formats(self, metadataXml): def _extract_video_formats(self, metadataXml):
if '/error_country_block.swf' in metadataXml: if '/error_country_block.swf' in metadataXml:
@ -108,6 +86,39 @@ class MTVIE(InfoExtractor):
u'Downloading info') u'Downloading info')
return [self._get_video_info(item) for item in idoc.findall('.//item')] return [self._get_video_info(item) for item in idoc.findall('.//item')]
class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
_TESTS = [
{
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
u'file': u'853555.mp4',
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
u'info_dict': {
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
{
u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
u'info_dict': {
u'title': u'Everything Has Changed',
u'upload_date': u'20130606',
u'uploader': u'Taylor Swift',
},
u'skip': u'VEVO is only available in some countries',
},
]
def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')

@ -1,15 +1,14 @@
import re import re
from .mtv import MTVIE, _media_xml_tag from .mtv import MTVServicesInfoExtractor
class SouthParkStudiosIE(MTVIE): class SouthParkStudiosIE(MTVServicesInfoExtractor):
IE_NAME = u'southparkstudios.com' IE_NAME = u'southparkstudios.com'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
# Overwrite MTVIE properties we don't want
_TESTS = [{ _TESTS = [{
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE):
}, },
}] }]
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
else:
return thumb_node.attrib['url']
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
url = u'http://www.' + mobj.group(u'url') url = u'http://www.' + mobj.group(u'url')

Loading…
Cancel
Save