]> git.gir.st - subscriptionfeed.git/blob - app/youtube/cards.py
implement /feeds/videos.xml endpoint
[subscriptionfeed.git] / app / youtube / cards.py
1 from urllib.parse import parse_qs, urlparse
2
3 from ..common.common import mkthumbs, log_unknown_card, G # TODO: temporary, will move to somewhere else in common
4
5 def prepare_infocards(metadata):
6 cards = metadata.get('cards',{}).get('cardCollectionRenderer',{}).get('cards',[])
7 return list(filter(None, map(parse_infocard, cards)))
8
9 def prepare_endcards(metadata):
10 endsc = metadata.get('endscreen',{}).get('endscreenRenderer',{}).get('elements',[])
11 return list(filter(None, map(parse_endcard, endsc)))
12
13 def clean_url(url):
14 # externals URLs are redirected through youtube.com/redirect, but we
15 # may encounter internal URLs, too
16 return parse_qs(urlparse(url).query).get('q',[url])[0]
17
18 def toInt(s, fallback=0):
19 if s is None:
20 return fallback
21 try:
22 return int(''.join(filter(str.isdigit, s)))
23 except ValueError:
24 return fallback
25
26 # Remove left-/rightmost word from string:
27 delL = lambda s: s.partition(' ')[2]
28
29 def parse_infocard(card):
30 """
31 parses a single infocard into a format that's easier to handle.
32 """
33 card = card['cardRenderer']
34 if not 'content' in card:
35 return None # probably the "View corrections" card, ignore.
36 ctype = list(card['content'].keys())[0]
37 content = card['content'][ctype]
38 if ctype == "pollRenderer":
39 return {'type': "POLL", 'content': {
40 'question': content['question']['simpleText'],
41 'answers': [(a['text']['simpleText'],a['numVotes']) \
42 for a in content['choices']],
43 }}
44 elif ctype == "videoInfoCardContentRenderer":
45 is_live = content.get('badge',{}).get('liveBadgeRenderer') is not None
46 return {'type': "VIDEO", 'content': {
47 'video_id': content['action']['watchEndpoint']['videoId'],
48 'title': content['videoTitle']['simpleText'],
49 'author': delL(content['channelName']['simpleText']),
50 'length': content.get('lengthString',{}).get('simpleText') \
51 if not is_live else "LIVE", # "23:03"
52 'views': toInt(content.get('viewCountText',{}).get('simpleText')),
53 # XXX: views sometimes "Starts: July 31, 2020 at 1:30 PM"
54 }}
55 elif ctype == "playlistInfoCardContentRenderer":
56 return {'type': "PLAYLIST", 'content': {
57 'playlist_id': content['action']['watchEndpoint']['playlistId'],
58 'video_id': content['action']['watchEndpoint']['videoId'],
59 'title': content['playlistTitle']['simpleText'],
60 'author': delL(content['channelName']['simpleText']),
61 'n_videos': toInt(content['playlistVideoCount']['simpleText']),
62 }}
63 elif ctype == "simpleCardContentRenderer" and \
64 'urlEndpoint' in content['command']:
65 return {'type': "WEBSITE", 'content': {
66 'url': clean_url(content['command']['urlEndpoint']['url']),
67 'domain': content['displayDomain']['simpleText'],
68 'title': content['title']['simpleText'],
69 # XXX: no thumbnails for infocards
70 }}
71 elif ctype == "collaboratorInfoCardContentRenderer":
72 return {'type': "CHANNEL", 'content': {
73 'channel_id': content['endpoint']['browseEndpoint']['browseId'],
74 'title': content['channelName']['simpleText'],
75 'icons': mkthumbs(content['channelAvatar']['thumbnails']),
76 'subscribers': content.get('subscriberCountText',{}).get('simpleText',''), # "545K subscribers"
77 }}
78 else:
79 log_unknown_card(card)
80 return None
81
82 def parse_endcard(card):
83 """
84 parses a single endcard into a format that's easier to handle.
85 """
86 card = card.get('endscreenElementRenderer', card) #only sometimes nested
87 ctype = card['style']
88 if ctype == "CHANNEL":
89 return {'type': ctype, 'content': {
90 'channel_id': card['endpoint']['browseEndpoint']['browseId'],
91 'title': card['title']|G.text,
92 'icons': mkthumbs(card['image']['thumbnails']),
93 }}
94 elif ctype == "VIDEO":
95 if not 'endpoint' in card: return None # title == "This video is unavailable."
96 return {'type': ctype, 'content': {
97 'video_id': card['endpoint']['watchEndpoint']['videoId'],
98 'title': card['title']|G.text,
99 'length': card|G('videoDuration')|G.text, # '12:21'
100 'views': toInt(card['metadata']|G.text),
101 # XXX: no channel name
102 }}
103 elif ctype == "PLAYLIST":
104 return {'type': ctype, 'content': {
105 'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
106 'video_id': card['endpoint']['watchEndpoint']['videoId'],
107 'title': card['title']|G.text,
108 'author': delL(card['metadata']|G.text),
109 'n_videos': toInt(card['playlistLength']|G.text),
110 }}
111 elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE":
112 url = clean_url(card['endpoint']['urlEndpoint']['url'])
113 return {'type': "WEBSITE", 'content': {
114 'url': url,
115 'domain': urlparse(url).netloc,
116 'title': card['title']|G.text,
117 'icons': mkthumbs(card['image']['thumbnails']),
118 }}
119 else:
120 log_unknown_card(card)
121 return None
Imprint / Impressum