[ie/patreon] Fix extractors (#16112 )

Closes #15218, Closes #16111 Authored by: bashonly
[ie/zapiks] Improve extraction (#16030 )
2026-03-03 12:49:37 +00:00 · 2026-03-02 22:55:43 +00:00 · 2026-02-26 16:26:17 +00:00 · 2026-02-22 23:12:53 +00:00 · 2026-02-22 21:53:36 +00:00
6 changed files with 306 additions and 156 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -672,10 +672,6 @@ from .frontendmasters import (
    FrontendMastersIE,
    FrontendMastersLessonIE,
 )
-from .frontro import (
-    TheChosenGroupIE,
-    TheChosenIE,
-)
 from .fujitv import FujiTVFODPlus7IE
 from .funk import FunkIE
 from .funker530 import Funker530IE
@@ -2063,6 +2059,10 @@ from .tenplay import (
 from .testurl import TestURLIE
 from .tf1 import TF1IE
 from .tfo import TFOIE
+from .thechosen import (
+    TheChosenGroupIE,
+    TheChosenIE,
+)
 from .theguardian import (
    TheGuardianPodcastIE,
    TheGuardianPodcastPlaylistIE,
--- a/yt_dlp/extractor/aenetworks.py
+++ b/yt_dlp/extractor/aenetworks.py
@@ -91,8 +91,8 @@ class AENetworksBaseIE(ThePlatformIE):  # XXX: Do not subclass from concrete IE
        if filter_key == 'canonical':
            webpage = self._download_webpage(url, filter_value)
            graphql_video_id = self._search_regex(
-                r'<meta\b[^>]+\bcontent="[^"]*\btpid/(\d+)"', webpage,
-                'id') or self._html_search_meta('videoId', webpage, 'GraphQL video ID', fatal=True)
+                r'<meta\b[^>]+\bcontent="[^"]*\btpid/(\d+)"', webpage, 'id',
+                default=None) or self._html_search_meta('videoId', webpage, 'GraphQL video ID', fatal=True)
        else:
            graphql_video_id = filter_value

--- a/yt_dlp/extractor/frontro.py
+++ b/yt_dlp/extractor/frontro.py
@@ -99,66 +99,3 @@ class FrontroGroupBaseIE(FrontoBaseIE):
                'modified_timestamp': ('updatedAt', {parse_iso8601}),
            }),
        }
-
-
-class TheChosenIE(FrontroVideoBaseIE):
-    _CHANNEL_ID = '12884901895'
-
-    _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/watch/(?P<id>[0-9]+)'
-    _TESTS = [{
-        'url': 'https://watch.thechosen.tv/watch/184683594325',
-        'md5': '3f878b689588c71b38ec9943c54ff5b0',
-        'info_dict': {
-            'id': '184683594325',
-            'ext': 'mp4',
-            'title': 'Season 3 Episode 2: Two by Two',
-            'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
-            'comment_count': int,
-            'view_count': int,
-            'like_count': int,
-            'duration': 4212,
-            'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683594325/',
-            'timestamp': 1698954546,
-            'upload_date': '20231102',
-            'modified_timestamp': int,
-            'modified_date': str,
-        },
-    }, {
-        'url': 'https://watch.thechosen.tv/watch/184683596189',
-        'md5': 'd581562f9d29ce82f5b7770415334151',
-        'info_dict': {
-            'id': '184683596189',
-            'ext': 'mp4',
-            'title': 'Season 4 Episode 8: Humble',
-            'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
-            'comment_count': int,
-            'view_count': int,
-            'like_count': int,
-            'duration': 5092,
-            'thumbnail': r're:https://fastly\.frontrowcdn\.com/channels/12884901895/VIDEO_THUMBNAIL/184683596189/',
-            'timestamp': 1715019474,
-            'upload_date': '20240506',
-            'modified_timestamp': int,
-            'modified_date': str,
-        },
-    }]
-
-
-class TheChosenGroupIE(FrontroGroupBaseIE):
-    _CHANNEL_ID = '12884901895'
-    _VIDEO_EXTRACTOR = TheChosenIE
-    _VIDEO_URL_TMPL = 'https://watch.thechosen.tv/watch/%s'
-
-    _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
-    _TESTS = [{
-        'url': 'https://watch.thechosen.tv/group/309237658592',
-        'info_dict': {
-            'id': '309237658592',
-            'title': 'Season 3',
-            'timestamp': 1746203969,
-            'upload_date': '20250502',
-            'modified_timestamp': int,
-            'modified_date': str,
-        },
-        'playlist_count': 8,
-    }]
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -25,6 +25,7 @@ from ..utils.traversal import (
    find_elements,
    require,
    traverse_obj,
+    trim_str,
    value,
 )

@@ -32,16 +33,15 @@ from ..utils.traversal import (
 class PatreonBaseIE(InfoExtractor):
    @functools.cached_property
    def patreon_user_agent(self):
-        # Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection.
-        # Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in
+        # Patreon mobile UA yields higher res m3u8 for locked posts, but gives 401 if not logged-in
        if self._get_cookies('https://www.patreon.com/').get('session_id'):
-            return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)'
-        return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
+            return 'Patreon/126.9.0.15 (Android; Android 14; Scale/2.10)'
+        return None

    def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
        if headers is None:
            headers = {}
-        if 'User-Agent' not in headers:
+        if 'User-Agent' not in headers and self.patreon_user_agent:
            headers['User-Agent'] = self.patreon_user_agent
        if query:
            query.update({'json-api-version': 1.0})
@@ -50,7 +50,9 @@ class PatreonBaseIE(InfoExtractor):
            return self._download_json(
                f'https://www.patreon.com/api/{ep}',
                item_id, note=note if note else 'Downloading API JSON',
-                query=query, fatal=fatal, headers=headers)
+                query=query, fatal=fatal, headers=headers,
+                # If not using Patreon mobile UA, we need impersonation due to Cloudflare
+                impersonate=not self.patreon_user_agent)
        except ExtractorError as e:
            if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.response.headers.get('Content-Type')) != 'json':
                raise
@@ -623,14 +625,13 @@ class PatreonCampaignIE(PatreonBaseIE):
        'info_dict': {
            'id': '9631148',
            'title': 'Anything Else?',
-            'description': 'md5:2ee1db4aed2f9460c2b295825a24aa08',
+            'description': 'md5:b2f20eec4cb5520d9a4be4971f28add5',
            'uploader': 'dan ',
            'uploader_id': '13852412',
            'uploader_url': 'https://www.patreon.com/anythingelse',
            'channel': 'Anything Else?',
            'channel_id': '9631148',
            'channel_url': 'https://www.patreon.com/anythingelse',
-            'channel_follower_count': int,
            'age_limit': 0,
            'thumbnail': r're:https?://.+/.+',
        },
@@ -675,16 +676,15 @@ class PatreonCampaignIE(PatreonBaseIE):
                break

    def _real_extract(self, url):
-
        campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
        if campaign_id is None:
-            webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
-            campaign_id = traverse_obj(self._search_nextjs_data(webpage, vanity, default=None), (
-                'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
-            if not campaign_id:
-                campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
-                    ((..., 'value', 'campaign', 'data'), lambda _, v: v['type'] == 'campaign'),
-                    'id', {str}, any, {require('campaign ID')}))
+            results = self._call_api('search', vanity, query={
+                'q': vanity,
+                'page[size]': '5',
+            })['data']
+            campaign_id = traverse_obj(results, (
+                lambda _, v: v['type'] == 'campaign-document' and v['attributes']['url'].lower().endswith(f'/{vanity.lower()}'),
+                'id', {trim_str(start='campaign_')}, filter, any, {require('campaign ID')}))

        params = {
            'json-api-use-default-includes': 'false',
--- a/yt_dlp/extractor/thechosen.py
+++ b/yt_dlp/extractor/thechosen.py
@@ -0,0 +1,118 @@
+from .common import InfoExtractor
+from .frontro import FrontroGroupBaseIE
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class TheChosenIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/(?:video|watch)/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://watch.thechosen.tv/video/184683594325',
+        'md5': '3f878b689588c71b38ec9943c54ff5b0',
+        'info_dict': {
+            'id': '184683594325',
+            'ext': 'mp4',
+            'title': 'Season 3 Episode 2: Two by Two',
+            'description': 'md5:174c373756ecc8df46b403f4fcfbaf8c',
+            'duration': 4212,
+            'thumbnail': 'https://cas.global.ssl.fastly.net/hls-10-4/184683594325/thumbnail.png',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://watch.thechosen.tv/video/184683596189',
+        'md5': 'd581562f9d29ce82f5b7770415334151',
+        'info_dict': {
+            'id': '184683596189',
+            'ext': 'mp4',
+            'title': 'Season 4 Episode 8: Humble',
+            'description': 'md5:20a57bead43da1cf77cd5b0fe29bbc76',
+            'duration': 5092,
+            'thumbnail': 'https://cdn.thechosen.media/videos/cmkvu7nn500nhfm0wpgmm6180/thumbnail.jpg',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://watch.thechosen.tv/video/184683621748',
+        'info_dict': {
+            'id': '184683621748',
+            'ext': 'mp4',
+            'title': 'Season 5 Episode 2: House of Cards',
+            'description': 'md5:55b389cbb4b7a01d8c2d837102905617',
+            'duration': 3086,
+            'thumbnail': 'https://cdn.thechosen.media/videos/cmkolt4el000afd5zd6x0aeph/thumbnail.jpg',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://watch.thechosen.tv/video/184683621750',
+        'info_dict': {
+            'id': '184683621750',
+            'ext': 'mp4',
+            'title': 'Season 5 Episode 3:  Woes',
+            'description': 'md5:90ca3cc41316a965fd1cd3d5b3458784',
+            'duration': 3519,
+            'thumbnail': 'https://cdn.thechosen.media/videos/cmkoltsl8000dfd5z3luid3mg/thumbnail.jpg',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        metadata = self._download_json(f'https://api.watch.thechosen.tv/v1/videos/{video_id}', video_id)
+
+        formats, subtitles = [], {}
+        for fmt_url in traverse_obj(metadata, ('details', 'video', ..., 'url', {url_or_none})):
+            ext = determine_ext(fmt_url)
+            if ext == 'm3u8':
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(fmt_url, video_id, 'mp4', fatal=False)
+            elif ext == 'mpd':
+                fmts, subs = self._extract_mpd_formats_and_subtitles(fmt_url, video_id, fatal=False)
+            else:
+                self.report_warning(f'Skipping unsupported format extension "{ext}"', video_id=video_id)
+                continue
+            formats.extend(fmts)
+            self._merge_subtitles(subs, target=subtitles)
+
+        thumbnails = []
+        for thumb_id, thumb_url in traverse_obj(metadata, (
+            ('thumbs', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]),
+        )):
+            thumbnails.append({
+                'id': thumb_id,
+                'url': thumb_url,
+            })
+
+        return {
+            'id': video_id,
+            **traverse_obj(metadata, ({
+                'title': ('title', {str}),
+                'description': ('description', {str}),
+                'duration': ('duration', {int_or_none}),
+            })),
+            'thumbnails': thumbnails,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
+
+
+class TheChosenGroupIE(FrontroGroupBaseIE):
+    _WORKING = False
+    _CHANNEL_ID = '12884901895'
+    _VIDEO_EXTRACTOR = TheChosenIE
+    _VIDEO_URL_TMPL = 'https://watch.thechosen.tv/watch/%s'
+
+    _VALID_URL = r'https?://(?:www\.)?watch\.thechosen\.tv/group/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://watch.thechosen.tv/group/309237658592',
+        'info_dict': {
+            'id': '309237658592',
+            'title': 'Season 3',
+            'timestamp': 1746203969,
+            'upload_date': '20250502',
+            'modified_timestamp': int,
+            'modified_date': str,
+        },
+        'playlist_count': 8,
+    }]
--- a/yt_dlp/extractor/zapiks.py
+++ b/yt_dlp/extractor/zapiks.py
@@ -1,110 +1,205 @@
+import json
 import re
+import urllib.parse

 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
+    extract_attributes,
    int_or_none,
    parse_duration,
-    parse_iso8601,
-    xpath_text,
-    xpath_with_ns,
+    parse_resolution,
+    str_or_none,
+    unified_timestamp,
+    url_or_none,
+)
+from ..utils.traversal import (
+    find_element,
+    find_elements,
+    traverse_obj,
 )


 class ZapiksIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
-    _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"']
+    _VALID_URL = [
+        r'https?://(?:www\.)?zapiks\.(?:com|fr)/(?P<id>[\w-]+)\.html',
+        r'https?://(?:www\.)?zapiks\.fr/index\.php\?(?:[^#]+&)?media_id=(?P<id>\d+)',
+    ]
+    _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?zapiks\.fr/index\.php\?(?:[^#"\']+&(?:amp;)?)?media_id=\d+)']
    _TESTS = [{
-        'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
+        'url': 'https://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
        'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
        'info_dict': {
            'id': '80798',
            'ext': 'mp4',
            'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
-            'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
-            'thumbnail': r're:https?://zpks\.com/.+\.jpg',
+            'description': 'md5:db07a553c1550e2905bceafa923000fd',
+            'display_id': 'ep2s3-bon-appetit-eh-be-viva',
            'duration': 528,
+            'tags': 'count:5',
+            'thumbnail': r're:https?://zpks\.com/.+',
            'timestamp': 1359044972,
            'upload_date': '20130124',
+            'uploader': 'BonAppetit',
+            'uploader_id': 'bonappetit',
            'view_count': int,
        },
    }, {
-        'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
-        'only_matching': True,
-    }]
-    _WEBPAGE_TESTS = [{
-        'url': 'https://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
+        'url': 'https://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
+        'md5': '196fe42901639d868956b1dcaa48de15',
        'info_dict': {
            'id': '118046',
            'ext': 'mp4',
            'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
-            'thumbnail': r're:https?://zpks\.com/.+\.jpg',
+            'display_id': 'ep3s5-bon-appetit-baqueira-m-1',
+            'duration': 642,
+            'tags': 'count:8',
+            'thumbnail': r're:https?://zpks\.com/.+',
+            'timestamp': 1424370543,
+            'upload_date': '20150219',
+            'uploader': 'BonAppetit',
+            'uploader_id': 'bonappetit',
+            'view_count': int,
+        },
+    }, {
+        'url': 'https://www.zapiks.fr/index.php?action=playerIframe&media_id=164049',
+        'md5': 'fb81a7c9b7b84c00ba111028aee593b8',
+        'info_dict': {
+            'id': '164049',
+            'ext': 'mp4',
+            'title': 'Courchevel Hiver 2025/2026',
+            'display_id': 'courchevel-hiver-2025-2026',
+            'duration': 38,
+            'tags': 'count:1',
+            'thumbnail': r're:https?://zpks\.com/.+',
+            'timestamp': 1769019147,
+            'upload_date': '20260121',
+            'uploader': 'jamrek',
+            'uploader_id': 'jamrek',
+            'view_count': int,
+        },
+    }, {
+        # https://www.youtube.com/watch?v=UBAABvegu2M
+        'url': 'https://www.zapiks.com/live-fwt18-vallnord-arcalis-.html',
+        'info_dict': {
+            'id': 'UBAABvegu2M',
+            'ext': 'mp4',
+            'title': 'Replay Live - FWT18 Vallnord-Arcalís Andorra - Freeride World Tour 2018',
+            'age_limit': 0,
+            'availability': 'public',
+            'categories': ['Sports'],
+            'channel': 'FIS Freeride World Tour by Peak Performance',
+            'channel_follower_count': int,
+            'channel_id': 'UCraJ3GNFfw6LXFuCV6McByg',
+            'channel_url': 'https://www.youtube.com/channel/UCraJ3GNFfw6LXFuCV6McByg',
+            'comment_count': int,
+            'description': 'md5:2d9fefef758d5ad0d5a987d46aff7572',
+            'duration': 11328,
+            'heatmap': 'count:100',
+            'like_count': int,
+            'live_status': 'was_live',
+            'media_type': 'livestream',
+            'playable_in_embed': True,
+            'release_date': '20180306',
+            'release_timestamp': 1520321809,
+            'tags': 'count:27',
+            'thumbnail': r're:https?://i\.ytimg\.com/.+',
+            'timestamp': 1520336958,
+            'upload_date': '20180306',
+            'uploader': 'FIS Freeride World Tour by Peak Performance',
+            'uploader_id': '@FISFreerideWorldTour',
+            'uploader_url': 'https://www.youtube.com/@FISFreerideWorldTour',
+            'view_count': int,
+        },
+        'add_ie': ['Youtube'],
+    }, {
+        # https://vimeo.com/235746460
+        'url': 'https://www.zapiks.fr/waking-dream-2017-full-movie.html',
+        'info_dict': {
+            'id': '235746460',
+            'ext': 'mp4',
+            'title': '"WAKING DREAM" (2017) Full Movie by Sam Favret & Julien Herry',
+            'duration': 1649,
+            'thumbnail': r're:https?://i\.vimeocdn\.com/video/.+',
+            'uploader': 'Favret Sam',
+            'uploader_id': 'samfavret',
+            'uploader_url': 'https://vimeo.com/samfavret',
+        },
+        'add_ie': ['Vimeo'],
+        'expected_warnings': ['Failed to parse XML: not well-formed'],
+    }]
+    _WEBPAGE_TESTS = [{
+        # https://www.zapiks.fr/ep3s5-bon-appetit-baqueira-m-1.html
+        # https://www.zapiks.fr/index.php?action=playerIframe&media_id=118046
+        'url': 'https://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
+        'md5': '196fe42901639d868956b1dcaa48de15',
+        'info_dict': {
+            'id': '118046',
+            'ext': 'mp4',
+            'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
+            'description': 'md5:b45295c3897c4c01d7c04e8484c26aaf',
+            'display_id': 'ep3s5-bon-appetit-baqueira-m-1',
+            'duration': 642,
+            'tags': 'count:8',
+            'thumbnail': r're:https?://zpks\.com/.+',
+            'timestamp': 1424370543,
+            'upload_date': '20150219',
+            'uploader': 'BonAppetit',
+            'uploader_id': 'bonappetit',
+            'view_count': int,
        },
    }]
+    _UPLOADER_ID_RE = re.compile(r'/pro(?:fil)?/(?P<id>[^/?#]+)/?')

    def _real_extract(self, url):
-        mobj = self._match_valid_url(url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id') or video_id
-
+        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
+        if embed_url := traverse_obj(webpage, (
+            {find_element(cls='embed-container')}, {find_element(tag='iframe', html=True)},
+            {extract_attributes}, 'src', {self._proto_relative_url}, {url_or_none},
+        )):
+            if not self.suitable(embed_url):
+                return self.url_result(embed_url)

-        if not video_id:
-            video_id = self._search_regex(
-                r'data-media-id="(\d+)"', webpage, 'video id')
-
-        playlist = self._download_xml(
-            f'http://www.zapiks.fr/view/index.php?action=playlist&media_id={video_id}&lang=en',
-            display_id)
-
-        NS_MAP = {
-            'jwplayer': 'http://rss.jwpcdn.com/',
-        }
-
-        def ns(path):
-            return xpath_with_ns(path, NS_MAP)
-
-        item = playlist.find('./channel/item')
-
-        title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
-        description = self._og_search_description(webpage, default=None)
-        thumbnail = xpath_text(
-            item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
-        duration = parse_duration(self._html_search_meta(
-            'duration', webpage, 'duration', default=None))
-        timestamp = parse_iso8601(self._html_search_meta(
-            'uploadDate', webpage, 'upload date', default=None), ' ')
-
-        view_count = int_or_none(self._search_regex(
-            r'UserPlays:(\d+)', webpage, 'view count', default=None))
-        comment_count = int_or_none(self._search_regex(
-            r'UserComments:(\d+)', webpage, 'comment count', default=None))
+        video_responsive = traverse_obj(webpage, (
+            {find_element(cls='video-responsive', html=True)}, {extract_attributes}, {dict}))
+        data_media_url = traverse_obj(video_responsive, ('data-media-url', {url_or_none}))
+        if data_media_url and urllib.parse.urlparse(url).path == '/index.php':
+            return self.url_result(data_media_url, ZapiksIE)

+        data_playlist = traverse_obj(video_responsive, ('data-playlist', {json.loads}, ..., any))
        formats = []
-        for source in item.findall(ns('./jwplayer:source')):
-            format_id = source.attrib['label']
-            f = {
-                'url': source.attrib['file'],
+        for source in traverse_obj(data_playlist, (
+            'sources', lambda _, v: url_or_none(v['file']),
+        )):
+            format_id = traverse_obj(source, ('label', {str_or_none}))
+            formats.append({
                'format_id': format_id,
-            }
-            m = re.search(r'^(?P<height>\d+)[pP]', format_id)
-            if m:
-                f['height'] = int(m.group('height'))
-            formats.append(f)
+                'url': source['file'],
+                **parse_resolution(format_id),
+            })

        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'timestamp': timestamp,
-            'view_count': view_count,
-            'comment_count': comment_count,
+            'display_id': display_id,
+            'duration': parse_duration(self._html_search_meta('duration', webpage, default=None)),
            'formats': formats,
+            'timestamp': unified_timestamp(self._html_search_meta('uploadDate', webpage, default=None)),
+            **traverse_obj(webpage, {
+                'description': ({find_element(cls='description-text')}, {clean_html}, filter),
+                'tags': (
+                    {find_elements(cls='bs-label', html=True)},
+                    ..., {extract_attributes}, 'title', {clean_html}, filter),
+                'view_count': (
+                    {find_element(cls='video-content-view-counter')}, {clean_html},
+                    {lambda x: re.sub(r'(?:vues|views|\s+)', '', x)}, {int_or_none}),
+            }),
+            **traverse_obj(webpage, ({find_element(cls='video-content-user-link', html=True)}, {
+                'uploader': ({clean_html}, filter),
+                'uploader_id': ({extract_attributes}, 'href', {self._UPLOADER_ID_RE.fullmatch}, 'id'),
+            })),
+            **traverse_obj(data_playlist, {
+                'id': ('mediaid', {str_or_none}),
+                'title': ('title', {clean_html}, filter),
+                'thumbnail': ('image', {url_or_none}),
+            }),
        }
Author	SHA1	Message	Date
bashonly	bf4dfffe01	[ie/patreon] Fix extractors (#16112 ) Closes #15218, Closes #16111 Authored by: bashonly	2026-03-02 22:55:43 +00:00
doe1080	6f796a2bff	[ie/zapiks] Improve extraction (#16030 ) Authored by: doe1080	2026-02-26 16:26:17 +00:00
0x∅	e3118604aa	[ie/thechosen] Rework extractor (#16021 ) Closes #16008 Authored by: 0xvd	2026-02-22 23:12:53 +00:00
bashonly	338dbebdb8	[ie/aenetworks] Fix extraction (#16036 ) Fix `2485653859` Authored by: bashonly	2026-02-22 21:53:36 +00:00