You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
108 lines
3.6 KiB
108 lines
3.6 KiB
# coding: utf-8 |
|
from __future__ import unicode_literals |
|
|
|
import re |
|
|
|
from .common import InfoExtractor |
|
from ..utils import ( |
|
determine_ext, |
|
ExtractorError, |
|
get_element_by_id, |
|
PhantomJSwrapper, |
|
) |
|
|
|
|
|
class OpenloadIE(InfoExtractor): |
|
_VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' |
|
|
|
_TESTS = [{ |
|
'url': 'https://openload.co/f/kUEfGclsU9o', |
|
'md5': 'bf1c059b004ebc7a256f89408e65c36e', |
|
'info_dict': { |
|
'id': 'kUEfGclsU9o', |
|
'ext': 'mp4', |
|
'title': 'skyrim_no-audio_1080.mp4', |
|
'thumbnail': r're:^https?://.*\.jpg$', |
|
}, |
|
}, { |
|
'url': 'https://openload.co/embed/rjC09fkPLYs', |
|
'info_dict': { |
|
'id': 'rjC09fkPLYs', |
|
'ext': 'mp4', |
|
'title': 'movie.mp4', |
|
'thumbnail': r're:^https?://.*\.jpg$', |
|
'subtitles': { |
|
'en': [{ |
|
'ext': 'vtt', |
|
}], |
|
}, |
|
}, |
|
'params': { |
|
'skip_download': True, # test subtitles only |
|
}, |
|
}, { |
|
'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', |
|
'only_matching': True, |
|
}, { |
|
'url': 'https://openload.io/f/ZAn6oz-VZGE/', |
|
'only_matching': True, |
|
}, { |
|
'url': 'https://openload.co/f/_-ztPaZtMhM/', |
|
'only_matching': True, |
|
}, { |
|
# unavailable via https://openload.co/f/Sxz5sADo82g/, different layout |
|
# for title and ext |
|
'url': 'https://openload.co/embed/Sxz5sADo82g/', |
|
'only_matching': True, |
|
}, { |
|
'url': 'https://oload.tv/embed/KnG-kKZdcfY/', |
|
'only_matching': True, |
|
}] |
|
|
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' |
|
|
|
@staticmethod |
|
def _extract_urls(webpage): |
|
return re.findall( |
|
r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)', |
|
webpage) |
|
|
|
def _real_extract(self, url): |
|
video_id = self._match_id(url) |
|
url = 'https://openload.co/embed/%s/' % video_id |
|
headers = { |
|
'User-Agent': self._USER_AGENT, |
|
} |
|
|
|
webpage = self._download_webpage(url, video_id, headers=headers) |
|
|
|
if 'File not found' in webpage or 'deleted by the owner' in webpage: |
|
raise ExtractorError('File not found', expected=True, video_id=video_id) |
|
|
|
phantom = PhantomJSwrapper(self, required_version='2.0') |
|
webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers) |
|
|
|
decoded_id = get_element_by_id('streamurl', webpage) |
|
|
|
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id |
|
|
|
title = self._og_search_title(webpage, default=None) or self._search_regex( |
|
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, |
|
'title', default=None) or self._html_search_meta( |
|
'description', webpage, 'title', fatal=True) |
|
|
|
entries = self._parse_html5_media_entries(url, webpage, video_id) |
|
entry = entries[0] if entries else {} |
|
subtitles = entry.get('subtitles') |
|
|
|
info_dict = { |
|
'id': video_id, |
|
'title': title, |
|
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), |
|
'url': video_url, |
|
# Seems all videos have extensions in their titles |
|
'ext': determine_ext(title, 'mp4'), |
|
'subtitles': subtitles, |
|
'http_headers': headers, |
|
} |
|
return info_dict
|
|
|