From a2751238169574a451c9d3354b3680f91e633c92 Mon Sep 17 00:00:00 2001 From: girst Date: Tue, 18 Aug 2020 22:05:43 +0200 Subject: [PATCH] fix learning playlists --- app/common/innertube.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/app/common/innertube.py b/app/common/innertube.py index f2c97d6..3320d55 100644 --- a/app/common/innertube.py +++ b/app/common/innertube.py @@ -1,6 +1,7 @@ # functions that deal with parsing data from youtube's internal API ("innertube") from urllib.parse import parse_qs, urlparse +import re def findall(obj, key): """ @@ -133,18 +134,22 @@ def parse_result_items(items): ['navigationEndpoint']['browseEndpoint']['browseId'], 'length': content.get('lengthText',{}).get('simpleText') \ if not is_live else 'LIVE', # "44:07", "1:41:50" - 'views': toInt(content.get('viewCountText',{}).get('simpleText') or # "123,456 views" - listget(content.get('viewCountText',{}).get('runs'),0,{}).get('text')), # "1,234 watching" + 'views': toInt(content.get('viewCountText',{}).get('simpleText') or # "123,456 views", ... + listget(content.get('viewCountText',{}).get('runs',[]),0,{}).get('text')) or 0, # ... "1,234 watching", absent on 0 views 'published': age(content.get('publishedTimeText',{}).get('simpleText')), }}) elif key == 'playlistRenderer': results.append({'type': 'PLAYLIST', 'content': { - 'playlist_id': content['navigationEndpoint']['watchEndpoint']['playlistId'], - 'video_id': content['navigationEndpoint']['watchEndpoint']['videoId'], + 'playlist_id': content['navigationEndpoint'].get('watchEndpoint',{}).get('playlistId') or \ + content.get('playlistId'), # COURSE/"learning playlist" + 'video_id': content['navigationEndpoint'].get('watchEndpoint',{}).get('videoId') or \ + videoid_from_thumbnail(content), # learning playlist 'title': content['title']['simpleText'], - 'author': content['longBylineText']['runs'][0]['text'] or - content['shortBylineText']['runs'][0]['text'], - 'channel_id': content['longBylineText']['runs'][0]['navigationEndpoint']['browseEndpoint']['browseId'], # OR .shortBylineText + # Note: learning playlists have no author/channel_id + 'author': listget(content.get('longBylineText',{}).get('runs',[]),0,{}).get('text') or + listget(content.get('shortBylineText',{}).get('runs',[]),0,{}).get('text'), + 'channel_id': listget(content.get('longBylineText',{}).get('runs',[]),0,{}) \ + .get('navigationEndpoint',{}).get('browseEndpoint',{}).get('browseId'), # OR .shortBylineText 'n_videos': toInt(content['videoCount']), }}) elif key == 'radioRenderer': # "Mix" playlists @@ -167,7 +172,8 @@ def parse_result_items(items): 'subscribers': content.get('subscriberCountText',{}).get('simpleText'), # "2.47K subscribers" }}) elif key == 'shelfRenderer': - r, e = parse_result_items(content['content']['verticalListRenderer']['items']) + subkey = next(iter(content['content'].keys()), {}) #verticalListRenderer/horizontalMovieListRenderer + r, e = parse_result_items(content['content'][subkey]['items']) results.extend(r) extras.extend(e) elif key == 'movieRenderer': # movies to buy/rent @@ -287,6 +293,12 @@ def parse_endcard(card): log_unknown_card(card) return None +def videoid_from_thumbnail(content): + # learning playlist; example: PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5 (/user/enyay/playlists) + return re.match(r"https?://i.ytimg.com/vi/([-_0-9a-zA-Z]{11})|()", + listget(listget(content.get('thumbnails',[]),0,{}).get('thumbnails',[]),0,{}).get('url','') + ).group(1) + def parse_channel_items(items, channel_id, author): result = [] extra = [] @@ -311,7 +323,7 @@ def parse_channel_items(items, channel_id, author): elif key == "gridPlaylistRenderer" or key == "playlistRenderer": result.append({'type': 'PLAYLIST', 'content': { 'playlist_id': content['navigationEndpoint'].get('watchEndpoint',{}).get('playlistId') or content.get('playlistId'), - 'video_id': content['navigationEndpoint'].get('watchEndpoint',{}).get('videoId',{}), + 'video_id': content['navigationEndpoint'].get('watchEndpoint',{}).get('videoId',{}) or videoid_from_thumbnail(content), 'title': (content['title'].get('simpleText') or # playlistRenderer content['title']['runs'][0]['text']), # gridPlaylistRenderer 'author': author, -- 2.39.3