from urllib.parse import parse_qs, urlparse from ..common.common import mkthumbs, log_unknown_card, G # TODO: temporary, will move to somewhere else in common def prepare_infocards(metadata): cards = metadata.get('cards',{}).get('cardCollectionRenderer',{}).get('cards',[]) return list(filter(None, map(parse_infocard, cards))) def prepare_endcards(metadata): endsc = metadata.get('endscreen',{}).get('endscreenRenderer',{}).get('elements',[]) return list(filter(None, map(parse_endcard, endsc))) def clean_url(url): # externals URLs are redirected through youtube.com/redirect, but we # may encounter internal URLs, too return parse_qs(urlparse(url).query).get('q',[url])[0] def toInt(s, fallback=0): if s is None: return fallback try: return int(''.join(filter(str.isdigit, s))) except ValueError: return fallback # Remove left-/rightmost word from string: delL = lambda s: s.partition(' ')[2] def parse_infocard(card): """ parses a single infocard into a format that's easier to handle. """ card = card['cardRenderer'] if not 'content' in card: return None # probably the "View corrections" card, ignore. ctype = list(card['content'].keys())[0] content = card['content'][ctype] if ctype == "pollRenderer": return {'type': "POLL", 'content': { 'question': content['question']['simpleText'], 'answers': [(a['text']['simpleText'],a['numVotes']) \ for a in content['choices']], }} elif ctype == "videoInfoCardContentRenderer": is_live = content.get('badge',{}).get('liveBadgeRenderer') is not None return {'type': "VIDEO", 'content': { 'video_id': content['action']['watchEndpoint']['videoId'], 'title': content['videoTitle']['simpleText'], 'author': delL(content['channelName']['simpleText']), 'length': content.get('lengthString',{}).get('simpleText') \ if not is_live else "LIVE", # "23:03" 'views': toInt(content.get('viewCountText',{}).get('simpleText')), # XXX: views sometimes "Starts: July 31, 2020 at 1:30 PM" }} elif ctype == "playlistInfoCardContentRenderer": return {'type': "PLAYLIST", 'content': { 'playlist_id': content['action']['watchEndpoint']['playlistId'], 'video_id': content['action']['watchEndpoint']['videoId'], 'title': content['playlistTitle']['simpleText'], 'author': delL(content['channelName']['simpleText']), 'n_videos': toInt(content['playlistVideoCount']['simpleText']), }} elif ctype == "simpleCardContentRenderer" and \ 'urlEndpoint' in content['command']: return {'type': "WEBSITE", 'content': { 'url': clean_url(content['command']['urlEndpoint']['url']), 'domain': content['displayDomain']['simpleText'], 'title': content['title']['simpleText'], # XXX: no thumbnails for infocards }} elif ctype == "collaboratorInfoCardContentRenderer": return {'type': "CHANNEL", 'content': { 'channel_id': content['endpoint']['browseEndpoint']['browseId'], 'title': content['channelName']['simpleText'], 'icons': mkthumbs(content['channelAvatar']['thumbnails']), 'subscribers': content.get('subscriberCountText',{}).get('simpleText',''), # "545K subscribers" }} else: log_unknown_card(card) return None def parse_endcard(card): """ parses a single endcard into a format that's easier to handle. """ card = card.get('endscreenElementRenderer', card) #only sometimes nested ctype = card['style'] if ctype == "CHANNEL": return {'type': ctype, 'content': { 'channel_id': card['endpoint']['browseEndpoint']['browseId'], 'title': card['title']|G.text, 'icons': mkthumbs(card['image']['thumbnails']), }} elif ctype == "VIDEO": if not 'endpoint' in card: return None # title == "This video is unavailable." return {'type': ctype, 'content': { 'video_id': card['endpoint']['watchEndpoint']['videoId'], 'title': card['title']|G.text, 'length': card|G('videoDuration')|G.text, # '12:21' 'views': toInt(card['metadata']|G.text), # XXX: no channel name }} elif ctype == "PLAYLIST": return {'type': ctype, 'content': { 'playlist_id': card['endpoint']['watchEndpoint']['playlistId'], 'video_id': card['endpoint']['watchEndpoint']['videoId'], 'title': card['title']|G.text, 'author': delL(card['metadata']|G.text), 'n_videos': toInt(card['playlistLength']|G.text), }} elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE": url = clean_url(card['endpoint']['urlEndpoint']['url']) return {'type': "WEBSITE", 'content': { 'url': url, 'domain': urlparse(url).netloc, 'title': card['title']|G.text, 'icons': mkthumbs(card['image']['thumbnails']), }} else: log_unknown_card(card) return None