]> git.gir.st - subscriptionfeed.git/blob - app/browse/innertube.py
split common.innertube into youtube.cards and browse.innertube
[subscriptionfeed.git] / app / browse / innertube.py
1 # functions that deal with parsing data from youtube's internal API ("innertube")
2
3 from ..common.common import mkthumbs, log_unknown_card, G
4
5 class Select:
6 """ |Select('foo') returns the first foo in list, |Select(all='foo') returns all foos. """
7 def __init__(self, key=None, *, all=None):
8 self.key = key or all
9 self.all = all
10 def __ror__(self, other):
11 try: items = [ other[self.key] for other in other if self.key in other.keys() ]
12 except: items = []
13 return items if self.all else items|G(0)
14 class A:
15 """ apply """
16 def __init__(self, f, *args):
17 self.f = f
18 self.args = args
19 def __ror__(self, other):
20 return self.f(other, *self.args)
21 class _Int:
22 def __ror__(self, other):
23 try: return int(''.join(filter(str.isdigit, other)))
24 except: return None
25 int = _Int()
26
27
28 def prepare_searchresults(yt_results):
29 contents = ( # from continuation token
30 yt_results
31 |G('onResponseReceivedCommands')
32 |Select('appendContinuationItemsAction')
33 |G('continuationItems')
34 ) or ( # from page 1
35 yt_results
36 |G('contents')
37 |G('twoColumnSearchResultsRenderer')
38 |G('primaryContents')
39 |G('sectionListRenderer')
40 |G('contents')
41 )
42 items = contents|Select('itemSectionRenderer')|G('contents')
43 items, extra = parse_result_items(items)
44 more = contents|Select("continuationItemRenderer")|G("continuationEndpoint")|G("continuationCommand")|G("token")
45 estimatedResults = yt_results|G("estimatedResults")
46
47 return items, extra, more
48
49 def prepare_channel(response, channel_id, channel_name):
50 meta1 = response|G('metadata')|G('channelMetadataRenderer')
51 meta2 = response|G('microformat')|G('microformatDataRenderer')
52 title = meta1|G('title') or meta2|G('title') or channel_name
53 descr = meta1|G('description') or meta2|G('description') # meta2.description is capped at 160chars
54 thumb = mkthumbs((meta2|G('thumbnail') or meta1|G('avatar'))|G('thumbnails') or {}) # .avatar ~ 900px
55
56 contents = (
57 response|G('continuationContents') or
58 response|G('onResponseReceivedActions')
59 )
60 if not contents: # overran end of list
61 return title, descr, thumb, [], False
62
63 unparsed = contents|G('gridContinuation')|G('items') or \
64 contents|G('sectionListContinuation')|G('contents') or \
65 contents|G('richGridContinuation')|G('contents') or \
66 contents|Select('appendContinuationItemsAction')|G('continuationItems') or \
67 contents|G(-1)|G('reloadContinuationItemsCommand')|G('continuationItems') or []
68 items, extra = parse_channel_items(unparsed, channel_id, title)
69
70 more = ( # videos, livestreams
71 unparsed
72 |Select('continuationItemRenderer')
73 |G('continuationEndpoint')
74 |G('continuationCommand')
75 |G('token')
76 ) or ( # playlists, search
77 contents
78 |G('gridContinuation', 'sectionListContinuation')
79 |G('continuations')
80 |Select('nextContinuationData')
81 |G('continuation')
82 )
83
84 return title, descr, thumb, items, more
85
86 def prepare_playlist(result):
87 contents = result['continuationContents']
88 unparsed = contents['playlistVideoListContinuation'].get('contents',[])
89 more = (
90 contents
91 |G('playlistVideoListContinuation')
92 |G('continuations')
93 |Select('nextContinuationData')
94 |G('continuation')
95 )
96
97 meta = result|G('sidebar')|G('playlistSidebarRenderer')|G('items')
98 meta1 = meta|Select('playlistSidebarPrimaryInfoRenderer')
99 meta2 = meta|Select('playlistSidebarSecondaryInfoRenderer') \
100 |G('videoOwner')|G('videoOwnerRenderer')
101 title = meta1|G('title')|G.text
102 author = meta2|G('title')|G.text
103 channel_id = meta2|G('navigationEndpoint')|G('browseEndpoint')|G('browseId')
104
105 return title, author, channel_id, list(filter(None, map(parse_playlist, unparsed))), more
106
107 def age(s):
108 if s is None: # missing from autogen'd music, some livestreams
109 return None
110 # Some livestreams have "Streamed 7 hours ago"
111 s = s.replace("Streamed ","")
112 # Now, everything should be in the form "1 year ago"
113 value, unit, _ = s.split(" ")
114 suffix = dict(
115 minute='min',
116 minutes='min',
117 ).get(unit, unit[0]) # first letter otherwise (e.g. year(s) => y)
118
119 return f"{value}{suffix}"
120
121 def parse_result_items(items):
122 # TODO: use .get() for most non-essential attributes
123 """
124 parses youtube search response into an easier to use format.
125 """
126 results = []
127 extras = []
128 for item in items:
129 key = next(iter(item.keys()), None)
130 content = item[key]
131 if key in ['videoRenderer', 'reelItemRenderer']:
132 results.append({'type': 'VIDEO', 'content': {
133 'video_id': content['videoId'],
134 'title': content|G('title')|G.text or content|G('headline')|G.text,
135 'author': content|G('longBylineText','shortBylineText')|G.text,
136 'channel_id': content|G('ownerText')|G('runs')|G(0) \
137 |G('navigationEndpoint')|G('browseEndpoint')|G('browseId'),
138 'length': content|G('lengthText')|G.text, # "44:07", "1:41:50"
139 'views': content|G('viewCountText')|G.text|A.int or 0, # "1,234 {views|watching}", absent on 0 views
140 'published': content|G('publishedTimeText')|G('simpleText')|A(age),
141 'live': content|G('badges')|Select('metadataBadgeRenderer')|G('style')=='BADGE_STYLE_TYPE_LIVE_NOW',
142 }})
143 elif key in ['playlistRenderer', 'radioRenderer', 'showRenderer']: # radio == "Mix" playlist, show == normal playlist, specially displayed
144 results.append({'type': 'PLAYLIST', 'content': {
145 'playlist_id': content['navigationEndpoint']|G('watchEndpoint')|G('playlistId'),
146 'video_id': content['navigationEndpoint']|G('watchEndpoint')|G('videoId'),
147 'title': content['title']|G.text,
148 'author': content|G('longBylineText','shortBylineText')|G.text,
149 'channel_id': content|G('longBylineText','shortBylineText')|G('runs')|G(0) \
150 |G('navigationEndpoint')|G('browseEndpoint')|G('browseId'),
151 'n_videos': content|G('videoCount')|A.int or \
152 content|G('videoCountShortText','videoCountText')|G.text, # "Mix" playlists
153 }})
154 elif key == 'channelRenderer':
155 results.append({'type': 'CHANNEL', 'content': {
156 'channel_id': content['channelId'],
157 'title': content['title']|G.text,
158 'icons': content['thumbnail']['thumbnails']|A(mkthumbs),
159 'subscribers': content|G('subscriberCountText')|G('simpleText'), # "2.47K subscribers"
160 }})
161 elif key == 'shelfRenderer':
162 subkey = next(iter(content['content'].keys()), None) #verticalListRenderer/horizontalMovieListRenderer
163 r, e = parse_result_items(content['content'][subkey]['items'])
164 results.extend(r)
165 extras.extend(e)
166 elif key in ["reelShelfRenderer"]:
167 r, e = parse_result_items(content['items'])
168 results.extend(r)
169 extras.extend(e)
170 elif key in ['movieRenderer', 'gridMovieRenderer']: # movies to buy/rent
171 pass # gMR.{videoId,title.runs[].text,lengthText.simpleText}
172 elif key in ['carouselAdRenderer','searchPyvRenderer','promotedSparklesTextSearchRenderer',
173 'promotedSparklesWebRenderer','compactPromotedItemRenderer', 'adSlotRenderer']: # haha, no.
174 pass
175 elif key == 'horizontalCardListRenderer':
176 # suggested searches: .cards[].searchRefinementCardRenderer.query.runs[].text
177 pass
178 elif key == 'emergencyOneboxRenderer': # suicide prevention hotline
179 pass
180 elif key in ['clarificationRenderer', 'infoPanelContainerRenderer']: # COVID-19/conspiracy theory infos
181 pass
182 elif key == 'webAnswerRenderer': # "Result from the web"
183 pass
184 elif key == 'infoPanelContentRenderer': # "These results may be new or changing quickly"
185 pass
186 elif key == 'hashtagTileRenderer': # link to '/hashtag/<search_query>'
187 pass
188 elif key in ['didYouMeanRenderer', 'showingResultsForRenderer', 'includingResultsForRenderer']:
189 extras.append({
190 'type': 'spelling',
191 'query': content['correctedQueryEndpoint']['searchEndpoint']['query'], # non-misspelled query
192 'autocorrected': key in ['showingResultsForRenderer', 'includingResultsForRenderer'],
193 })
194 elif key == 'messageRenderer': # "No more results"
195 extras.append({
196 'type': 'message',
197 'message': content|G('title','text')|G.text,
198 })
199 elif key == 'backgroundPromoRenderer': # e.g. "no results"
200 extras.append({
201 'type': content['icon']['iconType'],
202 'message': content['title']|G.text,
203 })
204 else:
205 log_unknown_card(item)
206 return results, extras
207
208 def parse_channel_items(items, channel_id, author):
209 result = []
210 extra = []
211 for item in items:
212 key = next(iter(item.keys()), None)
213 content = item[key]
214 if key in ["gridVideoRenderer", "videoRenderer", "videoCardRenderer", 'reelItemRenderer']: # reel==youtube-shorts
215 # only videoCardRenderer (topic channels) has author and channel, others fall back to supplied ones.
216 result.append({'type': 'VIDEO', 'content': {
217 'video_id': content['videoId'],
218 'title': content|G('title')|G.text or content|G('headline')|G.text,
219 'author': content|G('bylineText')|G.text or author,
220 'channel_id': (content|G('bylineText')|G('runs')
221 |Select('navigationEndpoint')
222 |G('browseEndpoint')|G('browseId') or channel_id),
223 'length': (content|G('lengthText')|G.text or # topic channel
224 content|G('thumbnailOverlays')
225 |Select('thumbnailOverlayTimeStatusRenderer')
226 |G('text')|G.text),
227 # topic channel: .metadataText.simpleText = "22M views \u00b7 2 months ago"
228 'views': content|G('viewCountText')|G.text|A.int,
229 'published': content|G('publishedTimeText')|G.text|A(age),
230 }})
231 elif key in ["gridPlaylistRenderer", "playlistRenderer", "gridRadioRenderer"]:
232 result.append({'type': 'PLAYLIST', 'content': {
233 'playlist_id': content|G('navigationEndpoint')|G('watchEndpoint')|G('playlistId'),
234 'video_id': content|G('navigationEndpoint')|G('watchEndpoint')|G('videoId'),
235 'title': content|G('title')|G.text,
236 'author': author, # Note: gridRadioRenderer is by 'Youtube' without channel_id, ignoring that.
237 'channel_id': channel_id,
238 'n_videos': (content|G('videoCount')|A.int or # playlistRenderer
239 content|G('videoCountShortText','videoCountText')|G.text|A.int) # grid
240 }})
241 elif key == "showRenderer":
242 result.append({'type': 'PLAYLIST', 'content': {
243 'playlist_id': content['navigationEndpoint']['watchEndpoint']['playlistId'],
244 'video_id': content['navigationEndpoint']['watchEndpoint']['videoId'],
245 'title': content['title']['simpleText'],
246 'author': author,
247 'channel_id': channel_id,
248 'n_videos': None,
249 }})
250 elif key in ["gridShowRenderer"]:
251 result.append({'type': 'PLAYLIST', 'content': {
252 'playlist_id': (content|G('navigationEndpoint')
253 |G('browseEndpoint')|G('browseId'))[2:],
254 #^: playlistId prefixed with 'VL', which must be removed
255 'video_id': None,
256 'title': content|G('title')|G.text,
257 'author': author,
258 'channel_id': channel_id,
259 'n_videos': content|G('thumbnailOverlays')|G(0)
260 |G('thumbnailOverlayBottomPanelRenderer')|G('text')|G.text,
261 }})
262 elif key in ["itemSectionRenderer", "gridRenderer", "horizontalCardListRenderer", "horizontalListRenderer"]:
263 newkey = {
264 "itemSectionRenderer": 'contents',
265 "gridRenderer": 'items',
266 "horizontalCardListRenderer": 'cards',
267 "horizontalListRenderer": 'items',
268 }.get(key)
269 r, e = parse_channel_items(content[newkey], channel_id, author)
270 result.extend(r)
271 extra.extend(e)
272 elif key in ["shelfRenderer", "richItemRenderer"]:
273 r, e = parse_channel_items([content['content']], channel_id, author)
274 result.extend(r)
275 extra.extend(e)
276 elif key in ["reelShelfRenderer"]:
277 r, e = parse_channel_items(content['items'], channel_id, author)
278 result.extend(r)
279 extra.extend(e)
280 elif key == "messageRenderer":
281 # e.g. {'messageRenderer': {'text': {'runs': [{'text': 'This channel has no playlists.'}]}}}
282 pass
283 elif key == "gameCardRenderer":
284 pass
285 elif key == "gridChannelRenderer":
286 pass # don't care; related channels, e.g. on UCMsgXPD3wzzt8RxHJmXH7hQ
287 elif key == 'continuationItemRenderer': # handled in parent function
288 pass
289 else:
290 log_unknown_card(item)
291
292 return result, extra
293
294 def parse_playlist(item):
295 key = next(iter(item.keys()), None)
296 content = item[key]
297 if key == "playlistVideoRenderer":
298 if not content.get('isPlayable', False):
299 return None # private or deleted video
300
301 return {'type': 'VIDEO', 'content': {
302 'video_id': content['videoId'],
303 'title': (content['title'].get('simpleText') or # playable videos
304 content['title'].get('runs',[{}])[0].get('text')), # "[Private video]"
305 'playlist_id': content['navigationEndpoint']['watchEndpoint']['playlistId'],
306 'index': content['navigationEndpoint']['watchEndpoint'].get('index',0), #or int(content['index']['simpleText']) (absent on course intros; e.g. PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5)
307 # rest is missing from unplayable videos:
308 'author': content.get('shortBylineText',{}).get('runs',[{}])[0].get('text'),
309 'channel_id':content.get('shortBylineText',{}).get('runs',[{}])[0].get('navigationEndpoint',{}).get('browseEndpoint',{}).get('browseId'),
310 'length': (content.get("lengthText",{}).get("simpleText") or # "8:51"
311 int(content.get("lengthSeconds", 0))), # "531"
312 'starttime': content['navigationEndpoint']['watchEndpoint'].get('startTimeSeconds'),
313 }}
314 else:
315 raise Exception(item) # XXX TODO
Imprint / Impressum