[scrippsnetworks:watch] Fix extraction (closes #14389)
parent
210a2720bc
commit
b21ab85088
1 changed files with 167 additions and 44 deletions
@ -1,60 +1,183 @@ |
||||
# coding: utf-8 |
||||
from __future__ import unicode_literals |
||||
|
||||
from .adobepass import AdobePassIE |
||||
import datetime |
||||
import json |
||||
import hashlib |
||||
import hmac |
||||
import re |
||||
|
||||
from .common import InfoExtractor |
||||
from .anvato import AnvatoIE |
||||
from ..utils import ( |
||||
int_or_none, |
||||
smuggle_url, |
||||
update_url_query, |
||||
urlencode_postdata, |
||||
xpath_text, |
||||
) |
||||
|
||||
|
||||
class ScrippsNetworksWatchIE(AdobePassIE): |
||||
class ScrippsNetworksWatchIE(InfoExtractor): |
||||
IE_NAME = 'scrippsnetworks:watch' |
||||
_VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)' |
||||
_TEST = { |
||||
'url': 'http://watch.hgtv.com/player.HNT.html#0256538', |
||||
_VALID_URL = r'''(?x) |
||||
https?:// |
||||
watch\. |
||||
(?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/ |
||||
(?: |
||||
player\.[A-Z0-9]+\.html\#| |
||||
show/(?:[^/]+/){2} |
||||
) |
||||
(?P<id>\d+) |
||||
''' |
||||
_TESTS = [{ |
||||
'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/', |
||||
'md5': '26545fd676d939954c6808274bdb905a', |
||||
'info_dict': { |
||||
'id': '0256538', |
||||
'id': '4173834', |
||||
'ext': 'mp4', |
||||
'title': 'Seeking a Wow House', |
||||
'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.', |
||||
'uploader': 'SCNI', |
||||
'upload_date': '20170207', |
||||
'timestamp': 1486450493, |
||||
'title': 'Best Ever Treehouses', |
||||
'description': "We're searching for the most over the top treehouses.", |
||||
'uploader': 'ANV', |
||||
'upload_date': '20170922', |
||||
'timestamp': 1506056400, |
||||
}, |
||||
'params': { |
||||
'skip_download': True, |
||||
}, |
||||
'skip': 'requires TV provider authentication', |
||||
'add_ie': [AnvatoIE.ie_key()], |
||||
}, { |
||||
'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/', |
||||
'only_matching': True, |
||||
}, { |
||||
'url': 'http://watch.diynetwork.com/player.HNT.html#2656646', |
||||
'only_matching': True, |
||||
}] |
||||
|
||||
_SNI_TABLE = { |
||||
'hgtv': 'hgtv', |
||||
'diynetwork': 'diy', |
||||
'foodnetwork': 'food', |
||||
'cookingchanneltv': 'cook', |
||||
'travelchannel': 'trav', |
||||
'geniuskitchen': 'geniuskitchen', |
||||
} |
||||
_SNI_HOST = 'web.api.video.snidigital.com' |
||||
|
||||
_AWS_REGION = 'us-east-1' |
||||
_AWS_IDENTITY_ID_JSON = json.dumps({ |
||||
'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION |
||||
}) |
||||
_AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' |
||||
_AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' |
||||
_AWS_SERVICE = 'execute-api' |
||||
_AWS_REQUEST = 'aws4_request' |
||||
_AWS_SIGNED_HEADERS = ';'.join([ |
||||
'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key']) |
||||
_AWS_CANONICAL_REQUEST_TEMPLATE = '''GET |
||||
%(uri)s |
||||
|
||||
host:%(host)s |
||||
x-amz-date:%(date)s |
||||
x-amz-security-token:%(token)s |
||||
x-api-key:%(key)s |
||||
|
||||
%(signed_headers)s |
||||
%(payload_hash)s''' |
||||
|
||||
def _real_extract(self, url): |
||||
video_id = self._match_id(url) |
||||
webpage = self._download_webpage(url, video_id) |
||||
channel = self._parse_json(self._search_regex( |
||||
r'"channels"\s*:\s*(\[.+\])', |
||||
webpage, 'channels'), video_id)[0] |
||||
video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id) |
||||
title = video_data['title'] |
||||
release_url = video_data['releaseUrl'] |
||||
if video_data.get('restricted'): |
||||
requestor_id = self._search_regex( |
||||
r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id') |
||||
resource = self._get_mvpd_resource( |
||||
requestor_id, title, video_id, |
||||
video_data.get('ratings', [{}])[0].get('rating')) |
||||
auth = self._extract_mvpd_auth( |
||||
url, video_id, requestor_id, resource) |
||||
release_url = update_url_query(release_url, {'auth': auth}) |
||||
|
||||
return { |
||||
'_type': 'url_transparent', |
||||
'id': video_id, |
||||
'title': title, |
||||
'url': smuggle_url(release_url, {'force_smil_url': True}), |
||||
'description': video_data.get('description'), |
||||
'thumbnail': video_data.get('thumbnailUrl'), |
||||
'series': video_data.get('showTitle'), |
||||
'season_number': int_or_none(video_data.get('season')), |
||||
'episode_number': int_or_none(video_data.get('episodeNumber')), |
||||
'ie_key': 'ThePlatform', |
||||
mobj = re.match(self._VALID_URL, url) |
||||
site_id, video_id = mobj.group('site', 'id') |
||||
|
||||
def aws_hash(s): |
||||
return hashlib.sha256(s.encode('utf-8')).hexdigest() |
||||
|
||||
token = self._download_json( |
||||
'https://cognito-identity.us-east-1.amazonaws.com/', video_id, |
||||
data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'), |
||||
headers={ |
||||
'Accept': '*/*', |
||||
'Content-Type': 'application/x-amz-json-1.1', |
||||
'Referer': url, |
||||
'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON), |
||||
'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', |
||||
'X-Amz-User-Agent': self._AWS_USER_AGENT, |
||||
})['Token'] |
||||
|
||||
sts = self._download_xml( |
||||
'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({ |
||||
'Action': 'AssumeRoleWithWebIdentity', |
||||
'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role', |
||||
'RoleSessionName': 'web-identity', |
||||
'Version': '2011-06-15', |
||||
'WebIdentityToken': token, |
||||
}), headers={ |
||||
'Referer': url, |
||||
'X-Amz-User-Agent': self._AWS_USER_AGENT, |
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', |
||||
}) |
||||
|
||||
def get(key): |
||||
return xpath_text( |
||||
sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, |
||||
fatal=True) |
||||
|
||||
access_key_id = get('AccessKeyId') |
||||
secret_access_key = get('SecretAccessKey') |
||||
session_token = get('SessionToken') |
||||
|
||||
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html |
||||
uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id) |
||||
datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') |
||||
date = datetime_now[:8] |
||||
canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % { |
||||
'uri': uri, |
||||
'host': self._SNI_HOST, |
||||
'date': datetime_now, |
||||
'token': session_token, |
||||
'key': self._AWS_API_KEY, |
||||
'signed_headers': self._AWS_SIGNED_HEADERS, |
||||
'payload_hash': aws_hash(''), |
||||
} |
||||
|
||||
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html |
||||
credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]) |
||||
string_to_sign = '\n'.join([ |
||||
'AWS4-HMAC-SHA256', datetime_now, credential_string, |
||||
aws_hash(canonical_string)]) |
||||
|
||||
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html |
||||
def aws_hmac(key, msg): |
||||
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) |
||||
|
||||
def aws_hmac_digest(key, msg): |
||||
return aws_hmac(key, msg).digest() |
||||
|
||||
def aws_hmac_hexdigest(key, msg): |
||||
return aws_hmac(key, msg).hexdigest() |
||||
|
||||
k_secret = 'AWS4' + secret_access_key |
||||
k_date = aws_hmac_digest(k_secret.encode('utf-8'), date) |
||||
k_region = aws_hmac_digest(k_date, self._AWS_REGION) |
||||
k_service = aws_hmac_digest(k_region, self._AWS_SERVICE) |
||||
k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST) |
||||
|
||||
signature = aws_hmac_hexdigest(k_signing, string_to_sign) |
||||
|
||||
auth_header = ', '.join([ |
||||
'AWS4-HMAC-SHA256 Credential=%s' % '/'.join( |
||||
[access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]), |
||||
'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS, |
||||
'Signature=%s' % signature, |
||||
]) |
||||
|
||||
mcp_id = self._download_json( |
||||
'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={ |
||||
'Accept': '*/*', |
||||
'Referer': url, |
||||
'Authorization': auth_header, |
||||
'X-Amz-Date': datetime_now, |
||||
'X-Amz-Security-Token': session_token, |
||||
'X-Api-Key': self._AWS_API_KEY, |
||||
})['results'][0]['mcpId'] |
||||
|
||||
return self.url_result( |
||||
'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, |
||||
AnvatoIE.ie_key(), video_id=mcp_id) |
||||
|
Loading…
Reference in new issue