]> git.gir.st - subscriptionfeed.git/blob - app/browse/innertube.py
browse: support shortened video age labels
[subscriptionfeed.git] / app / browse / innertube.py
1 # functions that deal with parsing data from youtube's internal API ("innertube")
2
3 from ..common.common import mkthumbs, log_unknown_card, G
4
5 class Select:
6 """ |Select('foo') returns the first foo in list, |Select(all='foo') returns all foos. """
7 def __init__(self, key=None, *, all=None):
8 self.key = key or all
9 self.all = all
10 def __ror__(self, other):
11 try: items = [ other[self.key] for other in other if self.key in other.keys() ]
12 except: items = []
13 return items if self.all else items|G(0)
14 class A:
15 """ apply """
16 def __init__(self, f, *args):
17 self.f = f
18 self.args = args
19 def __ror__(self, other):
20 return self.f(other, *self.args)
21 class _Int:
22 def __ror__(self, other):
23 try: return int(''.join(filter(str.isdigit, other)))
24 except: return None
25 int = _Int()
26
27
28 def prepare_searchresults(yt_results):
29 contents = ( # from continuation token
30 yt_results
31 |G('onResponseReceivedCommands')
32 |Select('appendContinuationItemsAction')
33 |G('continuationItems')
34 ) or ( # from page 1
35 yt_results
36 |G('contents')
37 |G('twoColumnSearchResultsRenderer')
38 |G('primaryContents')
39 |G('sectionListRenderer')
40 |G('contents')
41 )
42 items = contents|Select('itemSectionRenderer')|G('contents')
43 items, extra = parse_result_items(items)
44 more = contents|Select("continuationItemRenderer")|G("continuationEndpoint")|G("continuationCommand")|G("token")
45 estimatedResults = yt_results|G("estimatedResults")
46
47 return items, extra, more
48
49 def prepare_channel(response, channel_id, channel_name):
50 meta1 = response|G('metadata')|G('channelMetadataRenderer')
51 meta2 = response|G('microformat')|G('microformatDataRenderer')
52 title = meta1|G('title') or meta2|G('title') or channel_name
53 descr = meta1|G('description') or meta2|G('description') # meta2.description is capped at 160chars
54 thumb = mkthumbs((meta2|G('thumbnail') or meta1|G('avatar'))|G('thumbnails') or {}) # .avatar ~ 900px
55
56 contents = (
57 response|G('continuationContents') or
58 response|G('onResponseReceivedActions')
59 )
60 if not contents: # overran end of list
61 return title, descr, thumb, [], False
62
63 unparsed = contents|G('gridContinuation')|G('items') or \
64 contents|G('sectionListContinuation')|G('contents') or \
65 contents|G('richGridContinuation')|G('contents') or \
66 contents|Select('appendContinuationItemsAction')|G('continuationItems') or \
67 contents|G(-1)|G('reloadContinuationItemsCommand')|G('continuationItems') or []
68 items, extra = parse_channel_items(unparsed, channel_id, title)
69
70 more = ( # videos, livestreams
71 unparsed
72 |Select('continuationItemRenderer')
73 |G('continuationEndpoint')
74 |G('continuationCommand')
75 |G('token')
76 ) or ( # playlists, search
77 contents
78 |G('gridContinuation', 'sectionListContinuation')
79 |G('continuations')
80 |Select('nextContinuationData')
81 |G('continuation')
82 )
83
84 return title, descr, thumb, items, more
85
86 def prepare_playlist(result):
87 contents = result['continuationContents']
88 unparsed = contents['playlistVideoListContinuation'].get('contents',[])
89 more = (
90 contents
91 |G('playlistVideoListContinuation')
92 |G('continuations')
93 |Select('nextContinuationData')
94 |G('continuation')
95 )
96
97 meta = result|G('sidebar')|G('playlistSidebarRenderer')|G('items')
98 meta1 = meta|Select('playlistSidebarPrimaryInfoRenderer')
99 meta2 = meta|Select('playlistSidebarSecondaryInfoRenderer') \
100 |G('videoOwner')|G('videoOwnerRenderer')
101 title = meta1|G('title')|G.text
102 author = meta2|G('title')|G.text
103 channel_id = meta2|G('navigationEndpoint')|G('browseEndpoint')|G('browseId')
104
105 return title, author, channel_id, list(filter(None, map(parse_playlist, unparsed))), more
106
107 def age(s):
108 if s is None: # missing from autogen'd music, some livestreams
109 return None
110 # Some livestreams have "Streamed 7 hours ago"
111 s = s.replace("Streamed ","")
112 # Now, everything should be in the form "1 year ago"
113 try:
114 value, unit, _ = s.split(" ")
115 except ValueError as e:
116 # '<n>{y|mo|d|h|min} ago' => https://github.com/TeamNewPipe/NewPipeExtractor/issues/1067
117 value_and_unit, _ = s.split(" ")
118 value = ''.join(filter(str.isdigit, value_and_unit))
119 unit = value_and_unit.replace(value, "")
120 suffix = dict(
121 min='min',
122 minute='min',
123 minutes='min',
124 ).get(unit, unit[0]) # first letter otherwise (e.g. year(s) => y)
125
126 return f"{value}{suffix}"
127
128 def parse_result_items(items):
129 # TODO: use .get() for most non-essential attributes
130 """
131 parses youtube search response into an easier to use format.
132 """
133 results = []
134 extras = []
135 for item in items:
136 key = next(iter(item.keys()), None)
137 content = item[key]
138 if key in ['videoRenderer', 'reelItemRenderer', 'gridVideoRenderer']:
139 results.append({'type': 'VIDEO', 'content': {
140 'video_id': content['videoId'],
141 'title': content|G('title')|G.text or content|G('headline')|G.text,
142 'author': content|G('longBylineText','shortBylineText')|G.text,
143 'channel_id': content|G('ownerText')|G('runs')|G(0) \
144 |G('navigationEndpoint')|G('browseEndpoint')|G('browseId') \
145 or content|G("channelThumbnailSupportedRenderers")| \
146 G("channelThumbnailWithLinkRenderer")|G("navigationEndpoint")| \
147 G("browseEndpoint")|G("browseId"),
148 'length': content|G('lengthText')|G.text, # "44:07", "1:41:50"
149 'views': content|G('viewCountText')|G.text|A.int or 0, # "1,234 {views|watching}", absent on 0 views
150 'published': content|G('publishedTimeText')|G('simpleText')|A(age),
151 'live': content|G('badges')|Select('metadataBadgeRenderer')|G('style')=='BADGE_STYLE_TYPE_LIVE_NOW',
152 }})
153 elif key in ['playlistRenderer', 'radioRenderer', 'showRenderer']: # radio == "Mix" playlist, show == normal playlist, specially displayed
154 results.append({'type': 'PLAYLIST', 'content': {
155 'playlist_id': content['navigationEndpoint']|G('watchEndpoint')|G('playlistId'),
156 'video_id': content['navigationEndpoint']|G('watchEndpoint')|G('videoId'),
157 'title': content['title']|G.text,
158 'author': content|G('longBylineText','shortBylineText')|G.text,
159 'channel_id': content|G('longBylineText','shortBylineText')|G('runs')|G(0) \
160 |G('navigationEndpoint')|G('browseEndpoint')|G('browseId'),
161 'n_videos': content|G('videoCount')|A.int or \
162 content|G('videoCountShortText','videoCountText')|G.text, # "Mix" playlists
163 }})
164 elif key == 'channelRenderer':
165 results.append({'type': 'CHANNEL', 'content': {
166 'channel_id': content['channelId'],
167 'title': content['title']|G.text,
168 'icons': content['thumbnail']['thumbnails']|A(mkthumbs),
169 'subscribers': content|G('subscriberCountText')|G('simpleText'), # "2.47K subscribers"
170 }})
171 elif key == 'shelfRenderer':
172 subkey = next(iter(content['content'].keys()), None) #verticalListRenderer/horizontalMovieListRenderer
173 r, e = parse_result_items(content['content'][subkey]['items'])
174 results.extend(r)
175 extras.extend(e)
176 elif key in ["reelShelfRenderer"]:
177 r, e = parse_result_items(content['items'])
178 results.extend(r)
179 extras.extend(e)
180 elif key in ['movieRenderer', 'gridMovieRenderer']: # movies to buy/rent
181 pass # gMR.{videoId,title.runs[].text,lengthText.simpleText}
182 elif key in ['carouselAdRenderer','searchPyvRenderer','promotedSparklesTextSearchRenderer',
183 'promotedSparklesWebRenderer','compactPromotedItemRenderer', 'adSlotRenderer']: # haha, no.
184 pass
185 elif key == 'horizontalCardListRenderer':
186 # suggested searches: .cards[].searchRefinementCardRenderer.query.runs[].text
187 pass
188 elif key == 'emergencyOneboxRenderer': # suicide prevention hotline
189 pass
190 elif key in ['clarificationRenderer', 'infoPanelContainerRenderer']: # COVID-19/conspiracy theory infos
191 pass
192 elif key == 'webAnswerRenderer': # "Result from the web"
193 pass
194 elif key == 'infoPanelContentRenderer': # "These results may be new or changing quickly"
195 pass
196 elif key == 'hashtagTileRenderer': # link to '/hashtag/<search_query>'
197 pass
198 elif key in ['didYouMeanRenderer', 'showingResultsForRenderer', 'includingResultsForRenderer']:
199 extras.append({
200 'type': 'spelling',
201 'query': content['correctedQueryEndpoint']['searchEndpoint']['query'], # non-misspelled query
202 'autocorrected': key in ['showingResultsForRenderer', 'includingResultsForRenderer'],
203 })
204 elif key == 'messageRenderer': # "No more results"
205 extras.append({
206 'type': 'message',
207 'message': content|G('title','text')|G.text,
208 })
209 elif key == 'backgroundPromoRenderer': # e.g. "no results"
210 extras.append({
211 'type': content['icon']['iconType'],
212 'message': content['title']|G.text,
213 })
214 else:
215 log_unknown_card(item)
216 return results, extras
217
218 def parse_channel_items(items, channel_id, author):
219 result = []
220 extra = []
221 for item in items:
222 key = next(iter(item.keys()), None)
223 content = item[key]
224 if key in ["gridVideoRenderer", "videoRenderer", "videoCardRenderer", 'reelItemRenderer']: # reel==youtube-shorts
225 # only videoCardRenderer (topic channels) has author and channel, others fall back to supplied ones.
226 result.append({'type': 'VIDEO', 'content': {
227 'video_id': content['videoId'],
228 'title': content|G('title')|G.text or content|G('headline')|G.text,
229 'author': content|G('bylineText')|G.text or author,
230 'channel_id': (content|G('bylineText')|G('runs')
231 |Select('navigationEndpoint')
232 |G('browseEndpoint')|G('browseId') or channel_id),
233 'length': (content|G('lengthText')|G.text or # topic channel
234 content|G('thumbnailOverlays')
235 |Select('thumbnailOverlayTimeStatusRenderer')
236 |G('text')|G.text),
237 # topic channel: .metadataText.simpleText = "22M views \u00b7 2 months ago"
238 'views': content|G('viewCountText')|G.text|A.int,
239 'published': content|G('publishedTimeText')|G.text|A(age),
240 }})
241 elif key in ["gridPlaylistRenderer", "playlistRenderer", "gridRadioRenderer"]:
242 result.append({'type': 'PLAYLIST', 'content': {
243 'playlist_id': content|G('navigationEndpoint')|G('watchEndpoint')|G('playlistId'),
244 'video_id': content|G('navigationEndpoint')|G('watchEndpoint')|G('videoId'),
245 'title': content|G('title')|G.text,
246 'author': author, # Note: gridRadioRenderer is by 'Youtube' without channel_id, ignoring that.
247 'channel_id': channel_id,
248 'n_videos': (content|G('videoCount')|A.int or # playlistRenderer
249 content|G('videoCountShortText','videoCountText')|G.text|A.int) # grid
250 }})
251 elif key == "showRenderer":
252 result.append({'type': 'PLAYLIST', 'content': {
253 'playlist_id': content['navigationEndpoint']['watchEndpoint']['playlistId'],
254 'video_id': content['navigationEndpoint']['watchEndpoint']['videoId'],
255 'title': content['title']['simpleText'],
256 'author': author,
257 'channel_id': channel_id,
258 'n_videos': None,
259 }})
260 elif key in ["gridShowRenderer"]:
261 result.append({'type': 'PLAYLIST', 'content': {
262 'playlist_id': (content|G('navigationEndpoint')
263 |G('browseEndpoint')|G('browseId'))[2:],
264 #^: playlistId prefixed with 'VL', which must be removed
265 'video_id': None,
266 'title': content|G('title')|G.text,
267 'author': author,
268 'channel_id': channel_id,
269 'n_videos': content|G('thumbnailOverlays')|G(0)
270 |G('thumbnailOverlayBottomPanelRenderer')|G('text')|G.text,
271 }})
272 elif key in ["itemSectionRenderer", "gridRenderer", "horizontalCardListRenderer", "horizontalListRenderer"]:
273 newkey = {
274 "itemSectionRenderer": 'contents',
275 "gridRenderer": 'items',
276 "horizontalCardListRenderer": 'cards',
277 "horizontalListRenderer": 'items',
278 }.get(key)
279 r, e = parse_channel_items(content[newkey], channel_id, author)
280 result.extend(r)
281 extra.extend(e)
282 elif key in ["shelfRenderer", "richItemRenderer"]:
283 r, e = parse_channel_items([content['content']], channel_id, author)
284 result.extend(r)
285 extra.extend(e)
286 elif key in ["reelShelfRenderer"]:
287 r, e = parse_channel_items(content['items'], channel_id, author)
288 result.extend(r)
289 extra.extend(e)
290 elif key == "messageRenderer":
291 # e.g. {'messageRenderer': {'text': {'runs': [{'text': 'This channel has no playlists.'}]}}}
292 pass
293 elif key == "gameCardRenderer":
294 pass
295 elif key == "gridChannelRenderer":
296 pass # don't care; related channels, e.g. on UCMsgXPD3wzzt8RxHJmXH7hQ
297 elif key == 'continuationItemRenderer': # handled in parent function
298 pass
299 else:
300 log_unknown_card(item)
301
302 return result, extra
303
304 def parse_playlist(item):
305 key = next(iter(item.keys()), None)
306 content = item[key]
307 if key == "playlistVideoRenderer":
308 if not content.get('isPlayable', False):
309 return None # private or deleted video
310
311 return {'type': 'VIDEO', 'content': {
312 'video_id': content['videoId'],
313 'title': (content['title'].get('simpleText') or # playable videos
314 content['title'].get('runs',[{}])[0].get('text')), # "[Private video]"
315 'playlist_id': content['navigationEndpoint']['watchEndpoint']['playlistId'],
316 'index': content['navigationEndpoint']['watchEndpoint'].get('index',0), #or int(content['index']['simpleText']) (absent on course intros; e.g. PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5)
317 # rest is missing from unplayable videos:
318 'author': content.get('shortBylineText',{}).get('runs',[{}])[0].get('text'),
319 'channel_id':content.get('shortBylineText',{}).get('runs',[{}])[0].get('navigationEndpoint',{}).get('browseEndpoint',{}).get('browseId'),
320 'length': (content.get("lengthText",{}).get("simpleText") or # "8:51"
321 int(content.get("lengthSeconds", 0))), # "531"
322 'starttime': content['navigationEndpoint']['watchEndpoint'].get('startTimeSeconds'),
323 }}
324 else:
325 raise Exception(item) # XXX TODO
Imprint / Impressum