mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-03-03 12:49:37 +00:00
Compare commits
4 Commits
2026.02.21
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf4dfffe01 | ||
|
|
6f796a2bff | ||
|
|
e3118604aa | ||
|
|
338dbebdb8 |
@@ -672,10 +672,6 @@ from .frontendmasters import (
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
)
|
||||
from .frontro import (
|
||||
TheChosenGroupIE,
|
||||
TheChosenIE,
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funk import FunkIE
|
||||
from .funker530 import Funker530IE
|
||||
@@ -2063,6 +2059,10 @@ from .tenplay import (
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .tfo import TFOIE
|
||||
from .thechosen import (
|
||||
TheChosenGroupIE,
|
||||
TheChosenIE,
|
||||
)
|
||||
from .theguardian import (
|
||||
TheGuardianPodcastIE,
|
||||
TheGuardianPodcastPlaylistIE,
|
||||
|
||||
@@ -91,8 +91,8 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
if filter_key == 'canonical':
|
||||
webpage = self._download_webpage(url, filter_value)
|
||||
graphql_video_id = self._search_regex(
|
||||
r'<meta\b[^>]+\bcontent="[^"]*\btpid/(\d+)"', webpage,
|
||||
'id') or self._html_search_meta('videoId', webpage, 'GraphQL video ID', fatal=True)
|
||||
r'<meta\b[^>]+\bcontent="[^"]*\btpid/(\d+)"', webpage, 'id',
|
||||
default=None) or self._html_search_meta('videoId', webpage, 'GraphQL video ID', fatal=True)
|
||||
else:
|
||||
graphql_video_id = filter_value
|
||||
|
||||
|
||||
@@ -99,66 +99,3 @@ class FrontroGroupBaseIE(FrontoBaseIE):
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class TheChosenIE(FrontroVideoBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/watch/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/watch/184683594325',
|
||||
'md5': '3f878b689588c71b38ec9943c54ff5b0',
|
||||
'info_dict': {
|
||||
'id': '184683594325',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 3 Episode 2: Two by Two',
|
||||
'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4212,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/',
|
||||
'timestamp': 1698954546,
|
||||
'upload_date': '20231102',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.thechosen.tv/watch/184683596189',
|
||||
'md5': 'd581562f9d29ce82f5b7770415334151',
|
||||
'info_dict': {
|
||||
'id': '184683596189',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 4 Episode 8: Humble',
|
||||
'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 5092,
|
||||
'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/',
|
||||
'timestamp': 1715019474,
|
||||
'upload_date': '20240506',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TheChosenGroupIE(FrontroGroupBaseIE):
|
||||
_CHANNEL_ID = '12884901895'
|
||||
_VIDEO_EXTRACTOR = TheChosenIE
|
||||
_VIDEO_URL_TMPL = 'https://watch.thechosen.tv/watch/%s'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/group/309237658592',
|
||||
'info_dict': {
|
||||
'id': '309237658592',
|
||||
'title': 'Season 3',
|
||||
'timestamp': 1746203969,
|
||||
'upload_date': '20250502',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
|
||||
@@ -25,6 +25,7 @@ from ..utils.traversal import (
|
||||
find_elements,
|
||||
require,
|
||||
traverse_obj,
|
||||
trim_str,
|
||||
value,
|
||||
)
|
||||
|
||||
@@ -32,16 +33,15 @@ from ..utils.traversal import (
|
||||
class PatreonBaseIE(InfoExtractor):
|
||||
@functools.cached_property
|
||||
def patreon_user_agent(self):
|
||||
# Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection.
|
||||
# Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in
|
||||
# Patreon mobile UA yields higher res m3u8 for locked posts, but gives 401 if not logged-in
|
||||
if self._get_cookies('https://www.patreon.com/').get('session_id'):
|
||||
return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)'
|
||||
return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
|
||||
return 'Patreon/126.9.0.15 (Android; Android 14; Scale/2.10)'
|
||||
return None
|
||||
|
||||
def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
if 'User-Agent' not in headers:
|
||||
if 'User-Agent' not in headers and self.patreon_user_agent:
|
||||
headers['User-Agent'] = self.patreon_user_agent
|
||||
if query:
|
||||
query.update({'json-api-version': 1.0})
|
||||
@@ -50,7 +50,9 @@ class PatreonBaseIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
f'https://www.patreon.com/api/{ep}',
|
||||
item_id, note=note if note else 'Downloading API JSON',
|
||||
query=query, fatal=fatal, headers=headers)
|
||||
query=query, fatal=fatal, headers=headers,
|
||||
# If not using Patreon mobile UA, we need impersonation due to Cloudflare
|
||||
impersonate=not self.patreon_user_agent)
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.response.headers.get('Content-Type')) != 'json':
|
||||
raise
|
||||
@@ -623,14 +625,13 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
'info_dict': {
|
||||
'id': '9631148',
|
||||
'title': 'Anything Else?',
|
||||
'description': 'md5:2ee1db4aed2f9460c2b295825a24aa08',
|
||||
'description': 'md5:b2f20eec4cb5520d9a4be4971f28add5',
|
||||
'uploader': 'dan ',
|
||||
'uploader_id': '13852412',
|
||||
'uploader_url': 'https://www.patreon.com/anythingelse',
|
||||
'channel': 'Anything Else?',
|
||||
'channel_id': '9631148',
|
||||
'channel_url': 'https://www.patreon.com/anythingelse',
|
||||
'channel_follower_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': r're:https?://.+/.+',
|
||||
},
|
||||
@@ -675,16 +676,15 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
|
||||
if campaign_id is None:
|
||||
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
|
||||
campaign_id = traverse_obj(self._search_nextjs_data(webpage, vanity, default=None), (
|
||||
'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
|
||||
if not campaign_id:
|
||||
campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
|
||||
((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'),
|
||||
'id', {str}, any, {require('campaign ID')}))
|
||||
results = self._call_api('search', vanity, query={
|
||||
'q': vanity,
|
||||
'page[size]': '5',
|
||||
})['data']
|
||||
campaign_id = traverse_obj(results, (
|
||||
lambda _, v: v['type'] == 'campaign-document' and v['attributes']['url'].lower().endswith(f'/{vanity.lower()}'),
|
||||
'id', {trim_str(start='campaign_')}, filter, any, {require('campaign ID')}))
|
||||
|
||||
params = {
|
||||
'json-api-use-default-includes': 'false',
|
||||
|
||||
118
yt_dlp/extractor/thechosen.py
Normal file
118
yt_dlp/extractor/thechosen.py
Normal file
@@ -0,0 +1,118 @@
|
||||
from .common import InfoExtractor
|
||||
from .frontro import FrontroGroupBaseIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TheChosenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/(?:video|watch)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/video/184683594325',
|
||||
'md5': '3f878b689588c71b38ec9943c54ff5b0',
|
||||
'info_dict': {
|
||||
'id': '184683594325',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 3 Episode 2: Two by Two',
|
||||
'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
|
||||
'duration': 4212,
|
||||
'thumbnail': 'https://cas.global.ssl.fastly.net/hls-10-4/184683594325/thumbnail.png',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://watch.thechosen.tv/video/184683596189',
|
||||
'md5': 'd581562f9d29ce82f5b7770415334151',
|
||||
'info_dict': {
|
||||
'id': '184683596189',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 4 Episode 8: Humble',
|
||||
'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
|
||||
'duration': 5092,
|
||||
'thumbnail': 'https://cdn.thechosen.media/videos/cmkvu7nn500nhfm0wpgmm6180/thumbnail.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://watch.thechosen.tv/video/184683621748',
|
||||
'info_dict': {
|
||||
'id': '184683621748',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 5 Episode 2: House of Cards',
|
||||
'description': 'md5:55b389cbb4b7a01d8c2d837102905617',
|
||||
'duration': 3086,
|
||||
'thumbnail': 'https://cdn.thechosen.media/videos/cmkolt4el000afd5zd6x0aeph/thumbnail.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://watch.thechosen.tv/video/184683621750',
|
||||
'info_dict': {
|
||||
'id': '184683621750',
|
||||
'ext': 'mp4',
|
||||
'title': 'Season 5 Episode 3: Woes',
|
||||
'description': 'md5:90ca3cc41316a965fd1cd3d5b3458784',
|
||||
'duration': 3519,
|
||||
'thumbnail': 'https://cdn.thechosen.media/videos/cmkoltsl8000dfd5z3luid3mg/thumbnail.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = self._download_json(f'https://api.watch.thechosen.tv/v1/videos/{video_id}', video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for fmt_url in traverse_obj(metadata, ('details', 'video', ..., 'url', {url_or_none})):
|
||||
ext = determine_ext(fmt_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(fmt_url, video_id, 'mp4', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(fmt_url, video_id, fatal=False)
|
||||
else:
|
||||
self.report_warning(f'Skipping unsupported format extension "{ext}"', video_id=video_id)
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
thumbnails = []
|
||||
for thumb_id, thumb_url in traverse_obj(metadata, (
|
||||
('thumbs', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
thumbnails.append({
|
||||
'id': thumb_id,
|
||||
'url': thumb_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(metadata, ({
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
})),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class TheChosenGroupIE(FrontroGroupBaseIE):
|
||||
_WORKING = False
|
||||
_CHANNEL_ID = '12884901895'
|
||||
_VIDEO_EXTRACTOR = TheChosenIE
|
||||
_VIDEO_URL_TMPL = 'https://watch.thechosen.tv/watch/%s'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.thechosen.tv/group/309237658592',
|
||||
'info_dict': {
|
||||
'id': '309237658592',
|
||||
'title': 'Season 3',
|
||||
'timestamp': 1746203969,
|
||||
'upload_date': '20250502',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
@@ -1,110 +1,205 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
find_elements,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class ZapiksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"']
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?zapiks\.(?:com|fr)/(?P<id>[\w-]+)\.html',
|
||||
r'https?://(?:www\.)?zapiks\.fr/index\.php\?(?:[^#]+&)?media_id=(?P<id>\d+)',
|
||||
]
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?zapiks\.fr/index\.php\?(?:[^#"\']+&(?:amp;)?)?media_id=\d+)']
|
||||
_TESTS = [{
|
||||
'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
|
||||
'url': 'https://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
|
||||
'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
|
||||
'info_dict': {
|
||||
'id': '80798',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
|
||||
'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
|
||||
'thumbnail': r're:https?://zpks\.com/.+\.jpg',
|
||||
'description': 'md5:db07a553c1550e2905bceafa923000fd',
|
||||
'display_id': 'ep2s3-bon-appetit-eh-be-viva',
|
||||
'duration': 528,
|
||||
'tags': 'count:5',
|
||||
'thumbnail': r're:https?://zpks\.com/.+',
|
||||
'timestamp': 1359044972,
|
||||
'upload_date': '20130124',
|
||||
'uploader': 'BonAppetit',
|
||||
'uploader_id': 'bonappetit',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
|
||||
'url': 'https://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
|
||||
'md5': '196fe42901639d868956b1dcaa48de15',
|
||||
'info_dict': {
|
||||
'id': '118046',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||
'thumbnail': r're:https?://zpks\.com/.+\.jpg',
|
||||
'display_id': 'ep3s5-bon-appetit-baqueira-m-1',
|
||||
'duration': 642,
|
||||
'tags': 'count:8',
|
||||
'thumbnail': r're:https?://zpks\.com/.+',
|
||||
'timestamp': 1424370543,
|
||||
'upload_date': '20150219',
|
||||
'uploader': 'BonAppetit',
|
||||
'uploader_id': 'bonappetit',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zapiks.fr/index.php?action=playerIframe&media_id=164049',
|
||||
'md5': 'fb81a7c9b7b84c00ba111028aee593b8',
|
||||
'info_dict': {
|
||||
'id': '164049',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courchevel Hiver 2025/2026',
|
||||
'display_id': 'courchevel-hiver-2025-2026',
|
||||
'duration': 38,
|
||||
'tags': 'count:1',
|
||||
'thumbnail': r're:https?://zpks\.com/.+',
|
||||
'timestamp': 1769019147,
|
||||
'upload_date': '20260121',
|
||||
'uploader': 'jamrek',
|
||||
'uploader_id': 'jamrek',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# https://www.youtube.com/watch?v=UBAABvegu2M
|
||||
'url': 'https://www.zapiks.com/live-fwt18-vallnord-arcalis-.html',
|
||||
'info_dict': {
|
||||
'id': 'UBAABvegu2M',
|
||||
'ext': 'mp4',
|
||||
'title': 'Replay Live - FWT18 Vallnord-Arcalís Andorra - Freeride World Tour 2018',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'categories': ['Sports'],
|
||||
'channel': 'FIS Freeride World Tour by Peak Performance',
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UCraJ3GNFfw6LXFuCV6McByg',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCraJ3GNFfw6LXFuCV6McByg',
|
||||
'comment_count': int,
|
||||
'description': 'md5:2d9fefef758d5ad0d5a987d46aff7572',
|
||||
'duration': 11328,
|
||||
'heatmap': 'count:100',
|
||||
'like_count': int,
|
||||
'live_status': 'was_live',
|
||||
'media_type': 'livestream',
|
||||
'playable_in_embed': True,
|
||||
'release_date': '20180306',
|
||||
'release_timestamp': 1520321809,
|
||||
'tags': 'count:27',
|
||||
'thumbnail': r're:https?://i\.ytimg\.com/.+',
|
||||
'timestamp': 1520336958,
|
||||
'upload_date': '20180306',
|
||||
'uploader': 'FIS Freeride World Tour by Peak Performance',
|
||||
'uploader_id': '@FISFreerideWorldTour',
|
||||
'uploader_url': 'https://www.youtube.com/@FISFreerideWorldTour',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
# https://vimeo.com/235746460
|
||||
'url': 'https://www.zapiks.fr/waking-dream-2017-full-movie.html',
|
||||
'info_dict': {
|
||||
'id': '235746460',
|
||||
'ext': 'mp4',
|
||||
'title': '"WAKING DREAM" (2017) Full Movie by Sam Favret & Julien Herry',
|
||||
'duration': 1649,
|
||||
'thumbnail': r're:https?://i\.vimeocdn\.com/video/.+',
|
||||
'uploader': 'Favret Sam',
|
||||
'uploader_id': 'samfavret',
|
||||
'uploader_url': 'https://vimeo.com/samfavret',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# https://www.zapiks.fr/ep3s5-bon-appetit-baqueira-m-1.html
|
||||
# https://www.zapiks.fr/index.php?action=playerIframe&media_id=118046
|
||||
'url': 'https://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
|
||||
'md5': '196fe42901639d868956b1dcaa48de15',
|
||||
'info_dict': {
|
||||
'id': '118046',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||
'description': 'md5:b45295c3897c4c01d7c04e8484c26aaf',
|
||||
'display_id': 'ep3s5-bon-appetit-baqueira-m-1',
|
||||
'duration': 642,
|
||||
'tags': 'count:8',
|
||||
'thumbnail': r're:https?://zpks\.com/.+',
|
||||
'timestamp': 1424370543,
|
||||
'upload_date': '20150219',
|
||||
'uploader': 'BonAppetit',
|
||||
'uploader_id': 'bonappetit',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
_UPLOADER_ID_RE = re.compile(r'/pro(?:fil)?/(?P<id>[^/?#]+)/?')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if embed_url := traverse_obj(webpage, (
|
||||
{find_element(cls='embed-container')}, {find_element(tag='iframe', html=True)},
|
||||
{extract_attributes}, 'src', {self._proto_relative_url}, {url_or_none},
|
||||
)):
|
||||
if not self.suitable(embed_url):
|
||||
return self.url_result(embed_url)
|
||||
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'data-media-id="(\d+)"', webpage, 'video id')
|
||||
|
||||
playlist = self._download_xml(
|
||||
f'http://www.zapiks.fr/view/index.php?action=playlist&media_id={video_id}&lang=en',
|
||||
display_id)
|
||||
|
||||
NS_MAP = {
|
||||
'jwplayer': 'http://rss.jwpcdn.com/',
|
||||
}
|
||||
|
||||
def ns(path):
|
||||
return xpath_with_ns(path, NS_MAP)
|
||||
|
||||
item = playlist.find('./channel/item')
|
||||
|
||||
title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = xpath_text(
|
||||
item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration', default=None))
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date', default=None), ' ')
|
||||
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'UserPlays:(\d+)', webpage, 'view count', default=None))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'UserComments:(\d+)', webpage, 'comment count', default=None))
|
||||
video_responsive = traverse_obj(webpage, (
|
||||
{find_element(cls='video-responsive', html=True)}, {extract_attributes}, {dict}))
|
||||
data_media_url = traverse_obj(video_responsive, ('data-media-url', {url_or_none}))
|
||||
if data_media_url and urllib.parse.urlparse(url).path == '/index.php':
|
||||
return self.url_result(data_media_url, ZapiksIE)
|
||||
|
||||
data_playlist = traverse_obj(video_responsive, ('data-playlist', {json.loads}, ..., any))
|
||||
formats = []
|
||||
for source in item.findall(ns('./jwplayer:source')):
|
||||
format_id = source.attrib['label']
|
||||
f = {
|
||||
'url': source.attrib['file'],
|
||||
for source in traverse_obj(data_playlist, (
|
||||
'sources', lambda _, v: url_or_none(v['file']),
|
||||
)):
|
||||
format_id = traverse_obj(source, ('label', {str_or_none}))
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(?P<height>\d+)[pP]', format_id)
|
||||
if m:
|
||||
f['height'] = int(m.group('height'))
|
||||
formats.append(f)
|
||||
'url': source['file'],
|
||||
**parse_resolution(format_id),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'display_id': display_id,
|
||||
'duration': parse_duration(self._html_search_meta('duration', webpage, default=None)),
|
||||
'formats': formats,
|
||||
'timestamp': unified_timestamp(self._html_search_meta('uploadDate', webpage, default=None)),
|
||||
**traverse_obj(webpage, {
|
||||
'description': ({find_element(cls='description-text')}, {clean_html}, filter),
|
||||
'tags': (
|
||||
{find_elements(cls='bs-label', html=True)},
|
||||
..., {extract_attributes}, 'title', {clean_html}, filter),
|
||||
'view_count': (
|
||||
{find_element(cls='video-content-view-counter')}, {clean_html},
|
||||
{lambda x: re.sub(r'(?:vues|views|\s+)', '', x)}, {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(webpage, ({find_element(cls='video-content-user-link', html=True)}, {
|
||||
'uploader': ({clean_html}, filter),
|
||||
'uploader_id': ({extract_attributes}, 'href', {self._UPLOADER_ID_RE.fullmatch}, 'id'),
|
||||
})),
|
||||
**traverse_obj(data_playlist, {
|
||||
'id': ('mediaid', {str_or_none}),
|
||||
'title': ('title', {clean_html}, filter),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user