[funk] Fix extraction and rework extractors (closes #15792)

master
Sergey M․ 6 years ago
parent d91dd0ce19
commit 690404a6f8
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
  1. 5
      youtube_dl/extractor/extractors.py
  2. 101
      youtube_dl/extractor/funk.py

@ -385,7 +385,10 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE from .freespeech import FreespeechIE
from .freshlive import FreshLiveIE from .freshlive import FreshLiveIE
from .funimation import FunimationIE from .funimation import FunimationIE
from .funk import FunkIE from .funk import (
FunkMixIE,
FunkChannelIE,
)
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE from .fusion import FusionIE
from .fxnetworks import FXNetworksIE from .fxnetworks import FXNetworksIE

@ -1,43 +1,102 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from .nexx import NexxIE from .nexx import NexxIE
from ..utils import extract_attributes from ..utils import int_or_none
class FunkBaseIE(InfoExtractor):
def _make_url_result(self, video):
return {
'_type': 'url_transparent',
'url': 'nexx:741:%s' % video['sourceId'],
'ie_key': NexxIE.ie_key(),
'id': video['sourceId'],
'title': video.get('title'),
'description': video.get('description'),
'duration': int_or_none(video.get('duration')),
'season_number': int_or_none(video.get('seasonNr')),
'episode_number': int_or_none(video.get('episodeNr')),
}
class FunkMixIE(FunkBaseIE):
_VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
'md5': '8edf617c2f2b7c9847dfda313f199009',
'info_dict': {
'id': '123748',
'ext': 'mp4',
'title': '"Die realste Kifferdoku aller Zeiten"',
'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
'timestamp': 1490274721,
'upload_date': '20170323',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
mix_id = mobj.group('id')
alias = mobj.group('alias')
lists = self._download_json(
'https://www.funk.net/api/v3.1/curation/curatedLists/',
mix_id, headers={
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
'Referer': url,
}, query={
'size': 100,
})['result']['lists']
metas = next(
l for l in lists
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
video = next(
meta['videoDataDelegate']
for meta in metas if meta.get('alias') == alias)
return self._make_url_result(video)
class FunkIE(InfoExtractor): class FunkChannelIE(FunkBaseIE):
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)' _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/', 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
'md5': '4d40974481fa3475f8bccfd20c5361f8',
'info_dict': { 'info_dict': {
'id': '716599', 'id': '1155821',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Neue Rechte Welle', 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69', 'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
'timestamp': 1501337639, 'timestamp': 1514507395,
'upload_date': '20170729', 'upload_date': '20171229',
}, },
'params': { 'params': {
'format': 'bestvideo',
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/', 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
alias = mobj.group('alias')
webpage = self._download_webpage(url, video_id) results = self._download_json(
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
headers={
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
'Referer': url,
}, query={
'channelId': channel_id,
'size': 100,
})['result']
domain_id = NexxIE._extract_domain_id(webpage) or '741' video = next(r for r in results if r.get('alias') == alias)
nexx_id = extract_attributes(self._search_regex(
r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
webpage, 'media player'))['data-id']
return self.url_result( return self._make_url_result(video)
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
video_id=nexx_id)

Loading…
Cancel
Save