]> git.gir.st - subscriptionfeed.git/blob - app/browse/innertube.py
browse/search: support gridVideoRenderer
[subscriptionfeed.git] / app / browse / innertube.py
1 # functions that deal with parsing data from youtube's internal API ("innertube")
2
3 from ..common.common import mkthumbs, log_unknown_card, G
4
5 class Select:
6 """ |Select('foo') returns the first foo in list, |Select(all='foo') returns all foos. """
7 def __init__(self, key=None, *, all=None):
8 self.key = key or all
9 self.all = all
10 def __ror__(self, other):
11 try: items = [ other[self.key] for other in other if self.key in other.keys() ]
12 except: items = []
13 return items if self.all else items|G(0)
14 class A:
15 """ apply """
16 def __init__(self, f, *args):
17 self.f = f
18 self.args = args
19 def __ror__(self, other):
20 return self.f(other, *self.args)
21 class _Int:
22 def __ror__(self, other):
23 try: return int(''.join(filter(str.isdigit, other)))
24 except: return None
25 int = _Int()
26
27
28 def prepare_searchresults(yt_results):
29 contents = ( # from continuation token
30 yt_results
31 |G('onResponseReceivedCommands')
32 |Select('appendContinuationItemsAction')
33 |G('continuationItems')
34 ) or ( # from page 1
35 yt_results
36 |G('contents')
37 |G('twoColumnSearchResultsRenderer')
38 |G('primaryContents')
39 |G('sectionListRenderer')
40 |G('contents')
41 )
42 items = contents|Select('itemSectionRenderer')|G('contents')
43 items, extra = parse_result_items(items)
44 more = contents|Select("continuationItemRenderer")|G("continuationEndpoint")|G("continuationCommand")|G("token")
45 estimatedResults = yt_results|G("estimatedResults")
46
47 return items, extra, more
48
49 def prepare_channel(response, channel_id, channel_name):
50 meta1 = response|G('metadata')|G('channelMetadataRenderer')
51 meta2 = response|G('microformat')|G('microformatDataRenderer')
52 title = meta1|G('title') or meta2|G('title') or channel_name
53 descr = meta1|G('description') or meta2|G('description') # meta2.description is capped at 160chars
54 thumb = mkthumbs((meta2|G('thumbnail') or meta1|G('avatar'))|G('thumbnails') or {}) # .avatar ~ 900px
55
56 contents = (
57 response|G('continuationContents') or
58 response|G('onResponseReceivedActions')
59 )
60 if not contents: # overran end of list
61 return title, descr, thumb, [], False
62
63 unparsed = contents|G('gridContinuation')|G('items') or \
64 contents|G('sectionListContinuation')|G('contents') or \
65 contents|G('richGridContinuation')|G('contents') or \
66 contents|Select('appendContinuationItemsAction')|G('continuationItems') or \
67 contents|G(-1)|G('reloadContinuationItemsCommand')|G('continuationItems') or []
68 items, extra = parse_channel_items(unparsed, channel_id, title)
69
70 more = ( # videos, livestreams
71 unparsed
72 |Select('continuationItemRenderer')
73 |G('continuationEndpoint')
74 |G('continuationCommand')
75 |G('token')
76 ) or ( # playlists, search
77 contents
78 |G('gridContinuation', 'sectionListContinuation')
79 |G('continuations')
80 |Select('nextContinuationData')
81 |G('continuation')
82 )
83
84 return title, descr, thumb, items, more
85
86 def prepare_playlist(result):
87 contents = result['continuationContents']
88 unparsed = contents['playlistVideoListContinuation'].get('contents',[])
89 more = (
90 contents
91 |G('playlistVideoListContinuation')
92 |G('continuations')
93 |Select('nextContinuationData')
94 |G('continuation')
95 )
96
97 meta = result|G('sidebar')|G('playlistSidebarRenderer')|G('items')
98 meta1 = meta|Select('playlistSidebarPrimaryInfoRenderer')
99 meta2 = meta|Select('playlistSidebarSecondaryInfoRenderer') \
100 |G('videoOwner')|G('videoOwnerRenderer')
101 title = meta1|G('title')|G.text
102 author = meta2|G('title')|G.text
103 channel_id = meta2|G('navigationEndpoint')|G('browseEndpoint')|G('browseId')
104
105 return title, author, channel_id, list(filter(None, map(parse_playlist, unparsed))), more
106
107 def age(s):
108 if s is None: # missing from autogen'd music, some livestreams
109 return None
110 # Some livestreams have "Streamed 7 hours ago"
111 s = s.replace("Streamed ","")
112 # Now, everything should be in the form "1 year ago"
113 value, unit, _ = s.split(" ")
114 suffix = dict(
115 minute='min',
116 minutes='min',
117 ).get(unit, unit[0]) # first letter otherwise (e.g. year(s) => y)
118
119 return f"{value}{suffix}"
120
121 def parse_result_items(items):
122 # TODO: use .get() for most non-essential attributes
123 """
124 parses youtube search response into an easier to use format.
125 """
126 results = []
127 extras = []
128 for item in items:
129 key = next(iter(item.keys()), None)
130 content = item[key]
131 if key in ['videoRenderer', 'reelItemRenderer', 'gridVideoRenderer']:
132 results.append({'type': 'VIDEO', 'content': {
133 'video_id': content['videoId'],
134 'title': content|G('title')|G.text or content|G('headline')|G.text,
135 'author': content|G('longBylineText','shortBylineText')|G.text,
136 'channel_id': content|G('ownerText')|G('runs')|G(0) \
137 |G('navigationEndpoint')|G('browseEndpoint')|G('browseId') \
138 or content|G("channelThumbnailSupportedRenderers")| \
139 G("channelThumbnailWithLinkRenderer")|G("navigationEndpoint")| \
140 G("browseEndpoint")|G("browseId"),
141 'length': content|G('lengthText')|G.text, # "44:07", "1:41:50"
142 'views': content|G('viewCountText')|G.text|A.int or 0, # "1,234 {views|watching}", absent on 0 views
143 'published': content|G('publishedTimeText')|G('simpleText')|A(age),
144 'live': content|G('badges')|Select('metadataBadgeRenderer')|G('style')=='BADGE_STYLE_TYPE_LIVE_NOW',
145 }})
146 elif key in ['playlistRenderer', 'radioRenderer', 'showRenderer']: # radio == "Mix" playlist, show == normal playlist, specially displayed
147 results.append({'type': 'PLAYLIST', 'content': {
148 'playlist_id': content['navigationEndpoint']|G('watchEndpoint')|G('playlistId'),
149 'video_id': content['navigationEndpoint']|G('watchEndpoint')|G('videoId'),
150 'title': content['title']|G.text,
151 'author': content|G('longBylineText','shortBylineText')|G.text,
152 'channel_id': content|G('longBylineText','shortBylineText')|G('runs')|G(0) \
153 |G('navigationEndpoint')|G('browseEndpoint')|G('browseId'),
154 'n_videos': content|G('videoCount')|A.int or \
155 content|G('videoCountShortText','videoCountText')|G.text, # "Mix" playlists
156 }})
157 elif key == 'channelRenderer':
158 results.append({'type': 'CHANNEL', 'content': {
159 'channel_id': content['channelId'],
160 'title': content['title']|G.text,
161 'icons': content['thumbnail']['thumbnails']|A(mkthumbs),
162 'subscribers': content|G('subscriberCountText')|G('simpleText'), # "2.47K subscribers"
163 }})
164 elif key == 'shelfRenderer':
165 subkey = next(iter(content['content'].keys()), None) #verticalListRenderer/horizontalMovieListRenderer
166 r, e = parse_result_items(content['content'][subkey]['items'])
167 results.extend(r)
168 extras.extend(e)
169 elif key in ["reelShelfRenderer"]:
170 r, e = parse_result_items(content['items'])
171 results.extend(r)
172 extras.extend(e)
173 elif key in ['movieRenderer', 'gridMovieRenderer']: # movies to buy/rent
174 pass # gMR.{videoId,title.runs[].text,lengthText.simpleText}
175 elif key in ['carouselAdRenderer','searchPyvRenderer','promotedSparklesTextSearchRenderer',
176 'promotedSparklesWebRenderer','compactPromotedItemRenderer', 'adSlotRenderer']: # haha, no.
177 pass
178 elif key == 'horizontalCardListRenderer':
179 # suggested searches: .cards[].searchRefinementCardRenderer.query.runs[].text
180 pass
181 elif key == 'emergencyOneboxRenderer': # suicide prevention hotline
182 pass
183 elif key in ['clarificationRenderer', 'infoPanelContainerRenderer']: # COVID-19/conspiracy theory infos
184 pass
185 elif key == 'webAnswerRenderer': # "Result from the web"
186 pass
187 elif key == 'infoPanelContentRenderer': # "These results may be new or changing quickly"
188 pass
189 elif key == 'hashtagTileRenderer': # link to '/hashtag/<search_query>'
190 pass
191 elif key in ['didYouMeanRenderer', 'showingResultsForRenderer', 'includingResultsForRenderer']:
192 extras.append({
193 'type': 'spelling',
194 'query': content['correctedQueryEndpoint']['searchEndpoint']['query'], # non-misspelled query
195 'autocorrected': key in ['showingResultsForRenderer', 'includingResultsForRenderer'],
196 })
197 elif key == 'messageRenderer': # "No more results"
198 extras.append({
199 'type': 'message',
200 'message': content|G('title','text')|G.text,
201 })
202 elif key == 'backgroundPromoRenderer': # e.g. "no results"
203 extras.append({
204 'type': content['icon']['iconType'],
205 'message': content['title']|G.text,
206 })
207 else:
208 log_unknown_card(item)
209 return results, extras
210
211 def parse_channel_items(items, channel_id, author):
212 result = []
213 extra = []
214 for item in items:
215 key = next(iter(item.keys()), None)
216 content = item[key]
217 if key in ["gridVideoRenderer", "videoRenderer", "videoCardRenderer", 'reelItemRenderer']: # reel==youtube-shorts
218 # only videoCardRenderer (topic channels) has author and channel, others fall back to supplied ones.
219 result.append({'type': 'VIDEO', 'content': {
220 'video_id': content['videoId'],
221 'title': content|G('title')|G.text or content|G('headline')|G.text,
222 'author': content|G('bylineText')|G.text or author,
223 'channel_id': (content|G('bylineText')|G('runs')
224 |Select('navigationEndpoint')
225 |G('browseEndpoint')|G('browseId') or channel_id),
226 'length': (content|G('lengthText')|G.text or # topic channel
227 content|G('thumbnailOverlays')
228 |Select('thumbnailOverlayTimeStatusRenderer')
229 |G('text')|G.text),
230 # topic channel: .metadataText.simpleText = "22M views \u00b7 2 months ago"
231 'views': content|G('viewCountText')|G.text|A.int,
232 'published': content|G('publishedTimeText')|G.text|A(age),
233 }})
234 elif key in ["gridPlaylistRenderer", "playlistRenderer", "gridRadioRenderer"]:
235 result.append({'type': 'PLAYLIST', 'content': {
236 'playlist_id': content|G('navigationEndpoint')|G('watchEndpoint')|G('playlistId'),
237 'video_id': content|G('navigationEndpoint')|G('watchEndpoint')|G('videoId'),
238 'title': content|G('title')|G.text,
239 'author': author, # Note: gridRadioRenderer is by 'Youtube' without channel_id, ignoring that.
240 'channel_id': channel_id,
241 'n_videos': (content|G('videoCount')|A.int or # playlistRenderer
242 content|G('videoCountShortText','videoCountText')|G.text|A.int) # grid
243 }})
244 elif key == "showRenderer":
245 result.append({'type': 'PLAYLIST', 'content': {
246 'playlist_id': content['navigationEndpoint']['watchEndpoint']['playlistId'],
247 'video_id': content['navigationEndpoint']['watchEndpoint']['videoId'],
248 'title': content['title']['simpleText'],
249 'author': author,
250 'channel_id': channel_id,
251 'n_videos': None,
252 }})
253 elif key in ["gridShowRenderer"]:
254 result.append({'type': 'PLAYLIST', 'content': {
255 'playlist_id': (content|G('navigationEndpoint')
256 |G('browseEndpoint')|G('browseId'))[2:],
257 #^: playlistId prefixed with 'VL', which must be removed
258 'video_id': None,
259 'title': content|G('title')|G.text,
260 'author': author,
261 'channel_id': channel_id,
262 'n_videos': content|G('thumbnailOverlays')|G(0)
263 |G('thumbnailOverlayBottomPanelRenderer')|G('text')|G.text,
264 }})
265 elif key in ["itemSectionRenderer", "gridRenderer", "horizontalCardListRenderer", "horizontalListRenderer"]:
266 newkey = {
267 "itemSectionRenderer": 'contents',
268 "gridRenderer": 'items',
269 "horizontalCardListRenderer": 'cards',
270 "horizontalListRenderer": 'items',
271 }.get(key)
272 r, e = parse_channel_items(content[newkey], channel_id, author)
273 result.extend(r)
274 extra.extend(e)
275 elif key in ["shelfRenderer", "richItemRenderer"]:
276 r, e = parse_channel_items([content['content']], channel_id, author)
277 result.extend(r)
278 extra.extend(e)
279 elif key in ["reelShelfRenderer"]:
280 r, e = parse_channel_items(content['items'], channel_id, author)
281 result.extend(r)
282 extra.extend(e)
283 elif key == "messageRenderer":
284 # e.g. {'messageRenderer': {'text': {'runs': [{'text': 'This channel has no playlists.'}]}}}
285 pass
286 elif key == "gameCardRenderer":
287 pass
288 elif key == "gridChannelRenderer":
289 pass # don't care; related channels, e.g. on UCMsgXPD3wzzt8RxHJmXH7hQ
290 elif key == 'continuationItemRenderer': # handled in parent function
291 pass
292 else:
293 log_unknown_card(item)
294
295 return result, extra
296
297 def parse_playlist(item):
298 key = next(iter(item.keys()), None)
299 content = item[key]
300 if key == "playlistVideoRenderer":
301 if not content.get('isPlayable', False):
302 return None # private or deleted video
303
304 return {'type': 'VIDEO', 'content': {
305 'video_id': content['videoId'],
306 'title': (content['title'].get('simpleText') or # playable videos
307 content['title'].get('runs',[{}])[0].get('text')), # "[Private video]"
308 'playlist_id': content['navigationEndpoint']['watchEndpoint']['playlistId'],
309 'index': content['navigationEndpoint']['watchEndpoint'].get('index',0), #or int(content['index']['simpleText']) (absent on course intros; e.g. PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5)
310 # rest is missing from unplayable videos:
311 'author': content.get('shortBylineText',{}).get('runs',[{}])[0].get('text'),
312 'channel_id':content.get('shortBylineText',{}).get('runs',[{}])[0].get('navigationEndpoint',{}).get('browseEndpoint',{}).get('browseId'),
313 'length': (content.get("lengthText",{}).get("simpleText") or # "8:51"
314 int(content.get("lengthSeconds", 0))), # "531"
315 'starttime': content['navigationEndpoint']['watchEndpoint'].get('startTimeSeconds'),
316 }}
317 else:
318 raise Exception(item) # XXX TODO
Imprint / Impressum