Initial clone based on d65d89183f645a0e95910c3861491a75c26000eb upstream which is...
[youtube-dl.git] / youtube_dl / extractor / expotv.py
blob95a8977821d3c292470e42f0f9170674ed9a6aa2
1 from __future__ import unicode_literals
3 from .common import InfoExtractor
4 from ..utils import (
5 int_or_none,
6 unified_strdate,
10 class ExpoTVIE(InfoExtractor):
11 _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
12 _TEST = {
13 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
14 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
15 'info_dict': {
16 'id': '667916',
17 'ext': 'mp4',
18 'title': 'NYX Butter Lipstick Little Susie',
19 'description': 'Goes on like butter, but looks better!',
20 'thumbnail': r're:^https?://.*\.jpg$',
21 'uploader': 'Stephanie S.',
22 'upload_date': '20150520',
23 'view_count': int,
27 def _real_extract(self, url):
28 video_id = self._match_id(url)
30 webpage = self._download_webpage(url, video_id)
31 player_key = self._search_regex(
32 r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
33 config = self._download_json(
34 'http://client.expotv.com/video/config/%s/%s' % (video_id, player_key),
35 video_id, 'Downloading video configuration')
37 formats = []
38 for fcfg in config['sources']:
39 media_url = fcfg.get('file')
40 if not media_url:
41 continue
42 if fcfg.get('type') == 'm3u8':
43 formats.extend(self._extract_m3u8_formats(
44 media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
45 else:
46 formats.append({
47 'url': media_url,
48 'height': int_or_none(fcfg.get('height')),
49 'format_id': fcfg.get('label'),
50 'ext': self._search_regex(
51 r'filename=.*\.([a-z0-9_A-Z]+)&', media_url,
52 'file extension', default=None) or fcfg.get('type'),
54 self._sort_formats(formats)
56 title = self._og_search_title(webpage)
57 description = self._og_search_description(webpage)
58 thumbnail = config.get('image')
59 view_count = int_or_none(self._search_regex(
60 r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts'))
61 uploader = self._search_regex(
62 r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader',
63 fatal=False)
64 upload_date = unified_strdate(self._search_regex(
65 r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
66 fatal=False), day_first=False)
68 return {
69 'id': video_id,
70 'formats': formats,
71 'title': title,
72 'description': description,
73 'view_count': view_count,
74 'thumbnail': thumbnail,
75 'uploader': uploader,
76 'upload_date': upload_date,