]>
git.gir.st - subscriptionfeed.git/blob - app/browse/innertube.py
1 # functions that deal with parsing data from youtube's internal API ("innertube")
3 from .. common
. common
import mkthumbs
, log_unknown_card
, G
6 """ |Select('foo') returns the first foo in list, |Select(all='foo') returns all foos. """
7 def __init__ ( self
, key
= None , *, all
= None ):
10 def __ror__ ( self
, other
):
11 try : items
= [ other
[ self
. key
] for other
in other
if self
. key
in other
. keys () ]
13 return items
if self
. all
else items|
G ( 0 )
16 def __init__ ( self
, f
, * args
):
19 def __ror__ ( self
, other
):
20 return self
. f ( other
, * self
. args
)
22 def __ror__ ( self
, other
):
23 try : return int ( '' . join ( filter ( str . isdigit
, other
)))
28 def prepare_searchresults ( yt_results
):
29 contents
= ( # from continuation token
31 |
G ( 'onResponseReceivedCommands' )
32 |
Select ( 'appendContinuationItemsAction' )
33 |
G ( 'continuationItems' )
37 |
G ( 'twoColumnSearchResultsRenderer' )
39 |
G ( 'sectionListRenderer' )
42 items
= contents|
Select ( 'itemSectionRenderer' ) |
G ( 'contents' )
43 items
, extra
= parse_result_items ( items
)
44 more
= contents|
Select ( "continuationItemRenderer" ) |
G ( "continuationEndpoint" ) |
G ( "continuationCommand" ) |
G ( "token" )
45 estimatedResults
= yt_results|
G ( "estimatedResults" )
47 return items
, extra
, more
49 def prepare_channel ( response
, channel_id
, channel_name
):
50 meta1
= response|
G ( 'metadata' ) |
G ( 'channelMetadataRenderer' )
51 meta2
= response|
G ( 'microformat' ) |
G ( 'microformatDataRenderer' )
52 title
= meta1|
G ( 'title' ) or meta2|
G ( 'title' ) or channel_name
53 descr
= meta1|
G ( 'description' ) or meta2|
G ( 'description' ) # meta2.description is capped at 160chars
54 thumb
= mkthumbs (( meta2|
G ( 'thumbnail' ) or meta1|
G ( 'avatar' )) |
G ( 'thumbnails' ) or {}) # .avatar ~ 900px
57 response|
G ( 'continuationContents' ) or
58 response|
G ( 'onResponseReceivedActions' )
60 if not contents
: # overran end of list
61 return title
, descr
, thumb
, [], False
63 unparsed
= contents|
G ( 'gridContinuation' ) |
G ( 'items' ) or \
64 contents|
G ( 'sectionListContinuation' ) |
G ( 'contents' ) or \
65 contents|
G ( 'richGridContinuation' ) |
G ( 'contents' ) or \
66 contents|
Select ( 'appendContinuationItemsAction' ) |
G ( 'continuationItems' ) or \
67 contents|
G (- 1 ) |
G ( 'reloadContinuationItemsCommand' ) |
G ( 'continuationItems' ) or []
68 items
, extra
= parse_channel_items ( unparsed
, channel_id
, title
)
70 more
= ( # videos, livestreams
72 |
Select ( 'continuationItemRenderer' )
73 |
G ( 'continuationEndpoint' )
74 |
G ( 'continuationCommand' )
76 ) or ( # playlists, search
78 |
G ( 'gridContinuation' , 'sectionListContinuation' )
80 |
Select ( 'nextContinuationData' )
84 return title
, descr
, thumb
, items
, more
86 def prepare_playlist ( result
):
87 contents
= result
[ 'continuationContents' ]
88 unparsed
= contents|
G ( 'playlistVideoListContinuation' , 'richGridContinuation' ) |
G ( 'contents' ) or []
89 more
= ( # XXX: unavailable if richGridContinuation
91 |
G ( 'playlistVideoListContinuation' )
93 |
Select ( 'nextContinuationData' )
97 meta
= result|
G ( 'sidebar' ) |
G ( 'playlistSidebarRenderer' ) |
G ( 'items' )
98 meta1
= meta|
Select ( 'playlistSidebarPrimaryInfoRenderer' )
99 meta2
= meta|
Select ( 'playlistSidebarSecondaryInfoRenderer' ) \
100 |
G ( 'videoOwner' ) |
G ( 'videoOwnerRenderer' )
101 title
= meta1|
G ( 'title' ) |G
. text
102 author
= meta2|
G ( 'title' ) |G
. text
103 channel_id
= meta2|
G ( 'navigationEndpoint' ) |
G ( 'browseEndpoint' ) |
G ( 'browseId' )
105 return title
, author
, channel_id
, list ( filter ( None , map ( parse_playlist
, unparsed
))), more
108 if s
is None : # missing from autogen'd music, some livestreams
110 # Some livestreams have "Streamed 7 hours ago"
111 s
= s
. replace ( "Streamed " , "" )
112 # Now, everything should be in the form "1 year ago"
114 value
, unit
, _
= s
. split ( " " )
115 except ValueError as e
:
116 # '<n>{y|mo|d|h|min} ago' => https://github.com/TeamNewPipe/NewPipeExtractor/issues/1067
117 value_and_unit
, _
= s
. split ( " " )
118 value
= '' . join ( filter ( str . isdigit
, value_and_unit
))
119 unit
= value_and_unit
. replace ( value
, "" )
124 ). get ( unit
, unit
[ 0 ]) # first letter otherwise (e.g. year(s) => y)
126 return f
" {value}{suffix} "
128 def parse_result_items ( items
):
129 # TODO: use .get() for most non-essential attributes
131 parses youtube search response into an easier to use format.
136 key
= next ( iter ( item
. keys ()), None )
138 if key
in [ 'videoRenderer' , 'reelItemRenderer' , 'gridVideoRenderer' ]:
139 results
. append ({ 'type' : 'VIDEO' , 'content' : {
140 'video_id' : content
[ 'videoId' ],
141 'title' : content|
G ( 'title' ) |G
. text
or content|
G ( 'headline' ) |G
. text
,
142 'author' : content|
G ( 'longBylineText' , 'shortBylineText' ) |G
. text
,
143 'channel_id' : content|
G ( 'ownerText' ) |
G ( 'runs' ) |
G ( 0 ) \
144 |
G ( 'navigationEndpoint' ) |
G ( 'browseEndpoint' ) |
G ( 'browseId' ) \
145 or content|
G ( "channelThumbnailSupportedRenderers" ) | \
146 G ( "channelThumbnailWithLinkRenderer" ) |
G ( "navigationEndpoint" ) | \
147 G ( "browseEndpoint" ) |
G ( "browseId" ),
148 'length' : content|
G ( 'lengthText' ) |G
. text
, # "44:07", "1:41:50"
149 'views' : content|
G ( 'viewCountText' ) |G
. text|A
. int or 0 , # "1,234 {views|watching}", absent on 0 views
150 'published' : content|
G ( 'publishedTimeText' ) |
G ( 'simpleText' ) |
A ( age
),
151 'live' : content|
G ( 'badges' ) |
Select ( 'metadataBadgeRenderer' ) |
G ( 'style' )== 'BADGE_STYLE_TYPE_LIVE_NOW' ,
152 'shorts' : key
== 'reelItemRenderer' ,
154 elif key
in [ 'playlistRenderer' , 'radioRenderer' , 'showRenderer' ]: # radio == "Mix" playlist, show == normal playlist, specially displayed
155 results
. append ({ 'type' : 'PLAYLIST' , 'content' : {
156 'playlist_id' : content
[ 'navigationEndpoint' ] |
G ( 'watchEndpoint' ) |
G ( 'playlistId' ),
157 'video_id' : content
[ 'navigationEndpoint' ] |
G ( 'watchEndpoint' ) |
G ( 'videoId' ),
158 'title' : content
[ 'title' ] |G
. text
,
159 'author' : content|
G ( 'longBylineText' , 'shortBylineText' ) |G
. text
,
160 'channel_id' : content|
G ( 'longBylineText' , 'shortBylineText' ) |
G ( 'runs' ) |
G ( 0 ) \
161 |
G ( 'navigationEndpoint' ) |
G ( 'browseEndpoint' ) |
G ( 'browseId' ),
162 'n_videos' : content|
G ( 'videoCount' ) |A
. int or \
163 content|
G ( 'videoCountShortText' , 'videoCountText' ) |G
. text
, # "Mix" playlists
165 elif key
== 'channelRenderer' :
166 results
. append ({ 'type' : 'CHANNEL' , 'content' : {
167 'channel_id' : content
[ 'channelId' ],
168 'title' : content
[ 'title' ] |G
. text
,
169 'icons' : content
[ 'thumbnail' ][ 'thumbnails' ] |
A ( mkthumbs
),
170 'subscribers' : content|
G ( 'subscriberCountText' ) |
G ( 'simpleText' ), # "2.47K subscribers"
172 elif key
== 'shelfRenderer' :
173 subkey
= next ( iter ( content
[ 'content' ]. keys ()), None ) #verticalListRenderer/horizontalMovieListRenderer
174 r
, e
= parse_result_items ( content
[ 'content' ][ subkey
][ 'items' ])
177 elif key
in [ "reelShelfRenderer" ]:
178 r
, e
= parse_result_items ( content
[ 'items' ])
181 elif key
in [ 'movieRenderer' , 'gridMovieRenderer' ]: # movies to buy/rent
182 pass # gMR.{videoId,title.runs[].text,lengthText.simpleText}
183 elif key
in [ 'carouselAdRenderer' , 'searchPyvRenderer' , 'promotedSparklesTextSearchRenderer' ,
184 'promotedSparklesWebRenderer' , 'compactPromotedItemRenderer' , 'adSlotRenderer' ]: # haha, no.
186 elif key
== 'horizontalCardListRenderer' :
187 # suggested searches: .cards[].searchRefinementCardRenderer.query.runs[].text
189 elif key
== 'emergencyOneboxRenderer' : # suicide prevention hotline
191 elif key
in [ 'clarificationRenderer' , 'infoPanelContainerRenderer' ]: # COVID-19/conspiracy theory infos
193 elif key
== 'webAnswerRenderer' : # "Result from the web"
195 elif key
== 'infoPanelContentRenderer' : # "These results may be new or changing quickly"
197 elif key
== 'hashtagTileRenderer' : # link to '/hashtag/<search_query>'
199 elif key
in [ 'didYouMeanRenderer' , 'showingResultsForRenderer' , 'includingResultsForRenderer' ]:
202 'query' : content
[ 'correctedQueryEndpoint' ][ 'searchEndpoint' ][ 'query' ], # non-misspelled query
203 'autocorrected' : key
in [ 'showingResultsForRenderer' , 'includingResultsForRenderer' ],
205 elif key
== 'messageRenderer' : # "No more results"
208 'message' : content|
G ( 'title' , 'text' ) |G
. text
,
210 elif key
== 'backgroundPromoRenderer' : # e.g. "no results"
212 'type' : content
[ 'icon' ][ 'iconType' ],
213 'message' : content
[ 'title' ] |G
. text
,
216 log_unknown_card ( item
)
217 return results
, extras
219 def parse_channel_items ( items
, channel_id
, author
):
223 key
= next ( iter ( item
. keys ()), None )
225 if key
in [ "gridVideoRenderer" , "videoRenderer" , "videoCardRenderer" , 'reelItemRenderer' ]:
226 # only videoCardRenderer (topic channels) has author and channel, others fall back to supplied ones.
227 result
. append ({ 'type' : 'VIDEO' , 'content' : {
228 'video_id' : content
[ 'videoId' ],
229 'title' : content|
G ( 'title' ) |G
. text
or content|
G ( 'headline' ) |G
. text
,
230 'author' : content|
G ( 'bylineText' ) |G
. text
or author
,
231 'channel_id' : ( content|
G ( 'bylineText' ) |
G ( 'runs' )
232 |
Select ( 'navigationEndpoint' )
233 |
G ( 'browseEndpoint' ) |
G ( 'browseId' ) or channel_id
),
234 'length' : ( content|
G ( 'lengthText' ) |G
. text
or # topic channel
235 content|
G ( 'thumbnailOverlays' )
236 |
Select ( 'thumbnailOverlayTimeStatusRenderer' )
238 # topic channel: .metadataText.simpleText = "22M views \u00b7 2 months ago"
239 'views' : content|
G ( 'viewCountText' ) |G
. text|A
. int ,
240 'published' : content|
G ( 'publishedTimeText' ) |G
. text|
A ( age
),
241 'shorts' : key
== 'reelItemRenderer' ,
243 elif key
in [ "gridPlaylistRenderer" , "playlistRenderer" , "gridRadioRenderer" ]:
244 result
. append ({ 'type' : 'PLAYLIST' , 'content' : {
245 'playlist_id' : content|
G ( 'navigationEndpoint' ) |
G ( 'watchEndpoint' ) |
G ( 'playlistId' ),
246 'video_id' : content|
G ( 'navigationEndpoint' ) |
G ( 'watchEndpoint' ) |
G ( 'videoId' ),
247 'title' : content|
G ( 'title' ) |G
. text
,
248 'author' : author
, # Note: gridRadioRenderer is by 'Youtube' without channel_id, ignoring that.
249 'channel_id' : channel_id
,
250 'n_videos' : ( content|
G ( 'videoCount' ) |A
. int or # playlistRenderer
251 content|
G ( 'videoCountShortText' , 'videoCountText' ) |G
. text|A
. int ) # grid
253 elif key
== "showRenderer" :
254 result
. append ({ 'type' : 'PLAYLIST' , 'content' : {
255 'playlist_id' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ][ 'playlistId' ],
256 'video_id' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ][ 'videoId' ],
257 'title' : content
[ 'title' ][ 'simpleText' ],
259 'channel_id' : channel_id
,
262 elif key
in [ "gridShowRenderer" ]:
263 result
. append ({ 'type' : 'PLAYLIST' , 'content' : {
264 'playlist_id' : ( content|
G ( 'navigationEndpoint' )
265 |
G ( 'browseEndpoint' ) |
G ( 'browseId' ))[ 2 :],
266 #^: playlistId prefixed with 'VL', which must be removed
268 'title' : content|
G ( 'title' ) |G
. text
,
270 'channel_id' : channel_id
,
271 'n_videos' : content|
G ( 'thumbnailOverlays' ) |
G ( 0 )
272 |
G ( 'thumbnailOverlayBottomPanelRenderer' ) |
G ( 'text' ) |G
. text
,
274 elif key
in [ "itemSectionRenderer" , "gridRenderer" , "horizontalCardListRenderer" , "horizontalListRenderer" ]:
276 "itemSectionRenderer" : 'contents' ,
277 "gridRenderer" : 'items' ,
278 "horizontalCardListRenderer" : 'cards' ,
279 "horizontalListRenderer" : 'items' ,
281 r
, e
= parse_channel_items ( content
[ newkey
], channel_id
, author
)
284 elif key
in [ "shelfRenderer" , "richItemRenderer" ]:
285 r
, e
= parse_channel_items ([ content
[ 'content' ]], channel_id
, author
)
288 elif key
in [ "reelShelfRenderer" ]:
289 r
, e
= parse_channel_items ( content
[ 'items' ], channel_id
, author
)
292 elif key
== "messageRenderer" :
293 # e.g. {'messageRenderer': {'text': {'runs': [{'text': 'This channel has no playlists.'}]}}}
295 elif key
== "gameCardRenderer" :
297 elif key
== "gridChannelRenderer" :
298 pass # don't care; related channels, e.g. on UCMsgXPD3wzzt8RxHJmXH7hQ
299 elif key
== 'continuationItemRenderer' : # handled in parent function
302 log_unknown_card ( item
)
306 def parse_playlist ( item
):
307 key
= next ( iter ( item
. keys ()), None )
309 if key
in [ "playlistVideoRenderer" , "reelItemRenderer" ]:
310 if not content
. get ( 'isPlayable' , False ) and key
!= "reelItemRenderer" :
311 return None # private or deleted video
313 return { 'type' : 'VIDEO' , 'content' : {
314 'video_id' : content
[ 'videoId' ],
315 'title' : content|
G ( 'title' , 'headline' ) |G
. text
,
316 'playlist_id' : content
[ 'navigationEndpoint' ] |
G ( 'watchEndpoint' , 'reelWatchEndpoint' ) |
G ( 'playlistId' ),
317 'index' : content
[ 'navigationEndpoint' ]. get ( 'watchEndpoint' ,{}). get ( 'index' , 0 ), #or int(content['index']['simpleText']) (absent on course intros; e.g. PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5 or on shorts; e.g. PLnN2bBxGARv7fRxsCcWaxvGE6sn5Ypp1H)
318 # rest is missing from unplayable videos and from shorts:
319 'author' : content
. get ( 'shortBylineText' ,{}). get ( 'runs' ,[{}])[ 0 ]. get ( 'text' ),
320 'channel_id' : content
. get ( 'shortBylineText' ,{}). get ( 'runs' ,[{}])[ 0 ]. get ( 'navigationEndpoint' ,{}). get ( 'browseEndpoint' ,{}). get ( 'browseId' ),
321 'length' : ( content
. get ( "lengthText" ,{}). get ( "simpleText" ) or # "8:51"
322 int ( content
. get ( "lengthSeconds" , 0 ))), # "531"
323 'starttime' : content
[ 'navigationEndpoint' ]. get ( 'watchEndpoint' ,{}). get ( 'startTimeSeconds' ),
324 'shorts' : key
== "reelItemRenderer"
326 elif key
== "richItemRenderer" :
327 return parse_playlist ( content
[ 'content' ]) # should contain one ytshorts
329 raise Exception ( item
) # XXX TODO