From 40b077bc7ee2722463f23974ce488acb7f11815a Mon Sep 17 00:00:00 2001 From: thc202 Date: Fri, 27 Feb 2015 22:27:30 +0000 Subject: [PATCH 1/2] [oppetarkiv] Add new extractor Some, if not all, of the videos appear to be geo-blocked (Sweden). Test might fail (403 Forbidden) if not run through a Swedish connection. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/oppetarkiv.py | 56 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 youtube_dl/extractor/oppetarkiv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index aecb67bf4..1544f1059 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -350,6 +350,7 @@ from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .ooyala import OoyalaIE from .openfilm import OpenFilmIE +from .oppetarkiv import OppetArkivIE from .orf import ( ORFTVthekIE, ORFOE1IE, diff --git a/youtube_dl/extractor/oppetarkiv.py b/youtube_dl/extractor/oppetarkiv.py new file mode 100644 index 000000000..6dd1fad3f --- /dev/null +++ b/youtube_dl/extractor/oppetarkiv.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, +) + + +class OppetArkivIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?oppetarkiv.se/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318', + 'md5': '7b95ca9bedeead63012b2d7c3992c28f', + 'info_dict': { + 'id': '1058509', + 'ext': 'mp4', + 'title': 'Farlig kryssning', + 'duration': 2566, + 'thumbnail': 're:^https?://.*[\.-]jpg$', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_json( + 'http://www.oppetarkiv.se/video/%s?output=json' % video_id, video_id) + + title = info['context']['title'] + thumbnail = info['context'].get('thumbnailImage') + + video_info = info['video'] + formats = [] + for vr in video_info['videoReferences']: + vurl = vr['url'] + if determine_ext(vurl) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + vurl, video_id, + ext='mp4', entry_protocol='m3u8_native', + m3u8_id=vr.get('playerType'))) + else: + formats.append({ + 'format_id': vr.get('playerType'), + 'url': vurl, + }) + self._sort_formats(formats) + + duration = video_info.get('materialLength') + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'duration': duration, + } From e143f5dae9c767529b8b522a9df63ac0ee8fc356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Feb 2015 21:12:06 +0600 Subject: [PATCH 2/2] [oppetarkiv] Extract f4m formats and age limit --- youtube_dl/extractor/oppetarkiv.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/oppetarkiv.py b/youtube_dl/extractor/oppetarkiv.py index 6dd1fad3f..ae6a28308 100644 --- a/youtube_dl/extractor/oppetarkiv.py +++ b/youtube_dl/extractor/oppetarkiv.py @@ -8,17 +8,19 @@ from ..utils import ( class OppetArkivIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?oppetarkiv.se/video/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?oppetarkiv\.se/video/(?P[0-9]+)' _TEST = { 'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318', - 'md5': '7b95ca9bedeead63012b2d7c3992c28f', + 'md5': '5c1eb616e59f733d4af77edc5177d2fe', 'info_dict': { 'id': '1058509', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Farlig kryssning', 'duration': 2566, 'thumbnail': 're:^https?://.*[\.-]jpg$', + 'age_limit': 0, }, + 'skip': 'Only works from Sweden', } def _real_extract(self, url): @@ -33,11 +35,16 @@ class OppetArkivIE(InfoExtractor): formats = [] for vr in video_info['videoReferences']: vurl = vr['url'] - if determine_ext(vurl) == 'm3u8': + ext = determine_ext(vurl) + if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( vurl, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=vr.get('playerType'))) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + vurl + '?hdcore=3.3.0', video_id, + f4m_id=vr.get('playerType'))) else: formats.append({ 'format_id': vr.get('playerType'), @@ -47,10 +54,13 @@ class OppetArkivIE(InfoExtractor): duration = video_info.get('materialLength') + age_limit = 18 if video_info.get('inappropriateForChildren') else 0 + return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, 'duration': duration, + 'age_limit': age_limit, }