InstagramIE: fix the extraction of the uploader_id and the title

The page title is now 'Instagram', so we build it.
Also extract the description
master
Jaime Marquínez Ferrándiz 11 years ago
parent f631c3311a
commit 3f40217704
  1. 22
      youtube_dl/extractor/instagram.py

@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
u'md5': u'0d2da106a9d2631273e192b372806516',
u'info_dict': {
u"uploader_id": u"naomipq",
u"title": u"Video by naomipq"
u"title": u"Video by naomipq",
u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
}
}
@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
html_title = self._html_search_regex(
r'<title>(.+?)</title>',
webpage, u'title', flags=re.DOTALL)
title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
uploader_id = self._html_search_regex(
r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
webpage, u'uploader id', fatal=False, flags=re.DOTALL)
ext = 'mp4'
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
webpage, u'uploader id', fatal=False)
desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
fatal=False)
return [{
'id': video_id,
'url': self._og_search_video_url(webpage),
'ext': ext,
'title': title,
'ext': 'mp4',
'title': u'Video by %s' % uploader_id,
'thumbnail': self._og_search_thumbnail(webpage),
'uploader_id' : uploader_id
'uploader_id' : uploader_id,
'description': desc,
}]

Loading…
Cancel
Save