[niconico] Simplify and make work with old Python versions

The website requires SSLv3, otherwise it just times out during SSL negotiation.
master
Philipp Hagemeister 11 years ago
parent 4c9c57428f
commit 13ebea791f
  1. 121
      youtube_dl/extractor/niconico.py
  2. 29
      youtube_dl/utils.py

@ -17,6 +17,7 @@ from ..utils import (
unified_strdate, unified_strdate,
) )
class NiconicoIE(InfoExtractor): class NiconicoIE(InfoExtractor):
IE_NAME = u'niconico' IE_NAME = u'niconico'
IE_DESC = u'ニコニコ動画' IE_DESC = u'ニコニコ動画'
@ -38,8 +39,7 @@ class NiconicoIE(InfoExtractor):
}, },
} }
_VALID_URL = r'^(?:https?://)?(?:www\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
_LOGIN_URL = 'https://secure.nicovideo.jp/secure/login'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
# If True it will raise an error if no login info is provided # If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = True _LOGIN_REQUIRED = True
@ -57,99 +57,63 @@ class NiconicoIE(InfoExtractor):
# Log in # Log in
login_form_strs = { login_form_strs = {
u'mail': username, u'mail': username,
u'password': password, u'password': password,
} }
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode # chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
request = compat_urllib_request.Request(self._LOGIN_URL, login_data) request = compat_urllib_request.Request(
try: u'https://secure.nicovideo.jp/secure/login', login_data)
self.report_login() login_results = self._download_webpage(
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8') request, u'', note=u'Logging in', errnote=u'Unable to log in')
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None: if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password') self._downloader.report_warning(u'unable to log in: bad username or password')
return False
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
return False return False
return True return True
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._extract_id(url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
# Get video webpage # Get video webpage
self.report_video_webpage_download(video_id) video_webpage = self._download_webpage(
url = 'http://www.nicovideo.jp/watch/' + video_id 'http://www.nicovideo.jp/watch/' + video_id, video_id)
request = compat_urllib_request.Request(url)
try:
video_webpage = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
# Get video info video_info_webpage = self._download_webpage(
self.report_video_info_webpage_download(video_id) 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
url = 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id note=u'Downloading video info page')
request = compat_urllib_request.Request(url)
try:
video_info_webpage = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download video info webpage: %s' % compat_str(err))
# Get flv info # Get flv info
self.report_flv_info_webpage_download(video_id) flv_info_webpage = self._download_webpage(
url = 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
request = compat_urllib_request.Request(url) video_id, u'Downloading flv info')
try: video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
flv_info_webpage = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download flv info webpage: %s' % compat_str(err))
# Start extracting information # Start extracting information
self.report_information_extraction(video_id)
video_info = xml.etree.ElementTree.fromstring(video_info_webpage) video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
# url
video_real_url = compat_urlparse.parse_qs(flv_info_webpage.decode('utf-8'))['url'][0]
# title
video_title = video_info.find('.//title').text video_title = video_info.find('.//title').text
# ext
video_extension = video_info.find('.//movie_type').text video_extension = video_info.find('.//movie_type').text
# format
video_format = video_extension.upper() video_format = video_extension.upper()
# thumbnail
video_thumbnail = video_info.find('.//thumbnail_url').text video_thumbnail = video_info.find('.//thumbnail_url').text
# description
video_description = video_info.find('.//description').text video_description = video_info.find('.//description').text
# uploader_id
video_uploader_id = video_info.find('.//user_id').text video_uploader_id = video_info.find('.//user_id').text
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
video_view_count = video_info.find('.//view_counter').text
video_webpage_url = video_info.find('.//watch_url').text
# uploader # uploader
video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
request = compat_urllib_request.Request(url)
try: try:
user_info_webpage = compat_urllib_request.urlopen(request).read() user_info_webpage = self._download_webpage(
url, video_id, note=u'Downloading user information')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err)) self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
else:
user_info = xml.etree.ElementTree.fromstring(user_info_webpage) user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
video_uploader = user_info.find('.//nickname').text video_uploader = user_info.find('.//nickname').text
# uploder_date
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
# view_count
video_view_count = video_info.find('.//view_counter').text
# webpage_url
video_webpage_url = video_info.find('.//watch_url').text
return { return {
'id': video_id, 'id': video_id,
@ -165,26 +129,3 @@ class NiconicoIE(InfoExtractor):
'view_count': video_view_count, 'view_count': video_view_count,
'webpage_url': video_webpage_url, 'webpage_url': video_webpage_url,
} }
def _extract_id(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group(1)
return video_id
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self.to_screen(u'%s: Downloading video webpage' % video_id)
def report_video_info_webpage_download(self, video_id):
"""Report attempt to download video info webpage."""
self.to_screen(u'%s: Downloading video info webpage' % video_id)
def report_flv_info_webpage_download(self, video_id):
"""Report attempt to download flv info webpage."""
self.to_screen(u'%s: Downloading flv info webpage' % video_id)
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
self.to_screen(u'%s: Extracting video information' % video_id)

@ -12,6 +12,7 @@ import os
import pipes import pipes
import platform import platform
import re import re
import ssl
import socket import socket
import sys import sys
import traceback import traceback
@ -535,13 +536,31 @@ def formatSeconds(secs):
else: else:
return '%d' % secs return '%d' % secs
def make_HTTPS_handler(opts): def make_HTTPS_handler(opts):
if sys.version_info < (3,2): if sys.version_info < (3, 2):
# Python's 2.x handler is very simplistic import httplib
return compat_urllib_request.HTTPSHandler()
class HTTPSConnectionV3(httplib.HTTPSConnection):
def __init__(self, *args, **kwargs):
httplib.HTTPSConnection.__init__(self, *args, **kwargs)
def connect(self):
sock = socket.create_connection((self.host, self.port), self.timeout)
if self._tunnel_host:
self.sock = sock
self._tunnel()
try:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
except ssl.SSLError as e:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
def https_open(self, req):
return self.do_open(HTTPSConnectionV3, req)
return HTTPSHandlerV3()
else: else:
import ssl context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.set_default_verify_paths() context.set_default_verify_paths()
context.verify_mode = (ssl.CERT_NONE context.verify_mode = (ssl.CERT_NONE

Loading…
Cancel
Save