]>
git.gir.st - subscriptionfeed.git/blob - app/browse/innertube.py
1 # functions that deal with parsing data from youtube's internal API ("innertube")
3 from .. common
. common
import mkthumbs
, log_unknown_card
, G
6 """ |Select('foo') returns the first foo in list, |Select(all='foo') returns all foos. """
7 def __init__ ( self
, key
= None , *, all
= None ):
10 def __ror__ ( self
, other
):
11 try : items
= [ other
[ self
. key
] for other
in other
if self
. key
in other
. keys () ]
13 return items
if self
. all
else items|
G ( 0 )
16 def __init__ ( self
, f
, * args
):
19 def __ror__ ( self
, other
):
20 return self
. f ( other
, * self
. args
)
22 def __ror__ ( self
, other
):
23 try : return int ( '' . join ( filter ( str . isdigit
, other
)))
28 def prepare_searchresults ( yt_results
):
29 contents
= ( # from continuation token
31 |
G ( 'onResponseReceivedCommands' )
32 |
Select ( 'appendContinuationItemsAction' )
33 |
G ( 'continuationItems' )
37 |
G ( 'twoColumnSearchResultsRenderer' )
39 |
G ( 'sectionListRenderer' )
42 items
= contents|
Select ( 'itemSectionRenderer' ) |
G ( 'contents' )
43 items
, extra
= parse_result_items ( items
)
44 more
= contents|
Select ( "continuationItemRenderer" ) |
G ( "continuationEndpoint" ) |
G ( "continuationCommand" ) |
G ( "token" )
45 estimatedResults
= yt_results|
G ( "estimatedResults" )
47 return items
, extra
, more
49 def prepare_channel ( response
, channel_id
, channel_name
):
50 meta1
= response|
G ( 'metadata' ) |
G ( 'channelMetadataRenderer' )
51 meta2
= response|
G ( 'microformat' ) |
G ( 'microformatDataRenderer' )
52 title
= meta1|
G ( 'title' ) or meta2|
G ( 'title' ) or channel_name
53 descr
= meta1|
G ( 'description' ) or meta2|
G ( 'description' ) # meta2.description is capped at 160chars
54 thumb
= mkthumbs (( meta2|
G ( 'thumbnail' ) or meta1|
G ( 'avatar' )) |
G ( 'thumbnails' ) or {}) # .avatar ~ 900px
57 response|
G ( 'continuationContents' ) or
58 response|
G ( 'onResponseReceivedActions' )
60 if not contents
: # overran end of list
61 return title
, descr
, thumb
, [], False
63 unparsed
= contents|
G ( 'gridContinuation' ) |
G ( 'items' ) or \
64 contents|
G ( 'sectionListContinuation' ) |
G ( 'contents' ) or \
65 contents|
G ( 'richGridContinuation' ) |
G ( 'contents' ) or \
66 contents|
Select ( 'appendContinuationItemsAction' ) |
G ( 'continuationItems' ) or \
67 contents|
G (- 1 ) |
G ( 'reloadContinuationItemsCommand' ) |
G ( 'continuationItems' ) or []
68 items
, extra
= parse_channel_items ( unparsed
, channel_id
, title
)
70 more
= ( # videos, livestreams
72 |
Select ( 'continuationItemRenderer' )
73 |
G ( 'continuationEndpoint' )
74 |
G ( 'continuationCommand' )
76 ) or ( # playlists, search
78 |
G ( 'gridContinuation' , 'sectionListContinuation' )
80 |
Select ( 'nextContinuationData' )
84 return title
, descr
, thumb
, items
, more
86 def prepare_playlist ( result
):
87 contents
= result
[ 'continuationContents' ]
88 unparsed
= contents
[ 'playlistVideoListContinuation' ]. get ( 'contents' ,[])
91 |
G ( 'playlistVideoListContinuation' )
93 |
Select ( 'nextContinuationData' )
97 meta
= result|
G ( 'sidebar' ) |
G ( 'playlistSidebarRenderer' ) |
G ( 'items' )
98 meta1
= meta|
Select ( 'playlistSidebarPrimaryInfoRenderer' )
99 meta2
= meta|
Select ( 'playlistSidebarSecondaryInfoRenderer' ) \
100 |
G ( 'videoOwner' ) |
G ( 'videoOwnerRenderer' )
101 title
= meta1|
G ( 'title' ) |G
. text
102 author
= meta2|
G ( 'title' ) |G
. text
103 channel_id
= meta2|
G ( 'navigationEndpoint' ) |
G ( 'browseEndpoint' ) |
G ( 'browseId' )
105 return title
, author
, channel_id
, list ( filter ( None , map ( parse_playlist
, unparsed
))), more
108 if s
is None : # missing from autogen'd music, some livestreams
110 # Some livestreams have "Streamed 7 hours ago"
111 s
= s
. replace ( "Streamed " , "" )
112 # Now, everything should be in the form "1 year ago"
114 value
, unit
, _
= s
. split ( " " )
115 except ValueError as e
:
116 # '<n>{y|mo|d|h|min} ago' => https://github.com/TeamNewPipe/NewPipeExtractor/issues/1067
117 value_and_unit
, _
= s
. split ( " " )
118 value
= '' . join ( filter ( str . isdigit
, value_and_unit
))
119 unit
= value_and_unit
. replace ( value
, "" )
124 ). get ( unit
, unit
[ 0 ]) # first letter otherwise (e.g. year(s) => y)
126 return f
" {value}{suffix} "
128 def parse_result_items ( items
):
129 # TODO: use .get() for most non-essential attributes
131 parses youtube search response into an easier to use format.
136 key
= next ( iter ( item
. keys ()), None )
138 if key
in [ 'videoRenderer' , 'reelItemRenderer' , 'gridVideoRenderer' ]:
139 results
. append ({ 'type' : 'VIDEO' , 'content' : {
140 'video_id' : content
[ 'videoId' ],
141 'title' : content|
G ( 'title' ) |G
. text
or content|
G ( 'headline' ) |G
. text
,
142 'author' : content|
G ( 'longBylineText' , 'shortBylineText' ) |G
. text
,
143 'channel_id' : content|
G ( 'ownerText' ) |
G ( 'runs' ) |
G ( 0 ) \
144 |
G ( 'navigationEndpoint' ) |
G ( 'browseEndpoint' ) |
G ( 'browseId' ) \
145 or content|
G ( "channelThumbnailSupportedRenderers" ) | \
146 G ( "channelThumbnailWithLinkRenderer" ) |
G ( "navigationEndpoint" ) | \
147 G ( "browseEndpoint" ) |
G ( "browseId" ),
148 'length' : content|
G ( 'lengthText' ) |G
. text
, # "44:07", "1:41:50"
149 'views' : content|
G ( 'viewCountText' ) |G
. text|A
. int or 0 , # "1,234 {views|watching}", absent on 0 views
150 'published' : content|
G ( 'publishedTimeText' ) |
G ( 'simpleText' ) |
A ( age
),
151 'live' : content|
G ( 'badges' ) |
Select ( 'metadataBadgeRenderer' ) |
G ( 'style' )== 'BADGE_STYLE_TYPE_LIVE_NOW' ,
153 elif key
in [ 'playlistRenderer' , 'radioRenderer' , 'showRenderer' ]: # radio == "Mix" playlist, show == normal playlist, specially displayed
154 results
. append ({ 'type' : 'PLAYLIST' , 'content' : {
155 'playlist_id' : content
[ 'navigationEndpoint' ] |
G ( 'watchEndpoint' ) |
G ( 'playlistId' ),
156 'video_id' : content
[ 'navigationEndpoint' ] |
G ( 'watchEndpoint' ) |
G ( 'videoId' ),
157 'title' : content
[ 'title' ] |G
. text
,
158 'author' : content|
G ( 'longBylineText' , 'shortBylineText' ) |G
. text
,
159 'channel_id' : content|
G ( 'longBylineText' , 'shortBylineText' ) |
G ( 'runs' ) |
G ( 0 ) \
160 |
G ( 'navigationEndpoint' ) |
G ( 'browseEndpoint' ) |
G ( 'browseId' ),
161 'n_videos' : content|
G ( 'videoCount' ) |A
. int or \
162 content|
G ( 'videoCountShortText' , 'videoCountText' ) |G
. text
, # "Mix" playlists
164 elif key
== 'channelRenderer' :
165 results
. append ({ 'type' : 'CHANNEL' , 'content' : {
166 'channel_id' : content
[ 'channelId' ],
167 'title' : content
[ 'title' ] |G
. text
,
168 'icons' : content
[ 'thumbnail' ][ 'thumbnails' ] |
A ( mkthumbs
),
169 'subscribers' : content|
G ( 'subscriberCountText' ) |
G ( 'simpleText' ), # "2.47K subscribers"
171 elif key
== 'shelfRenderer' :
172 subkey
= next ( iter ( content
[ 'content' ]. keys ()), None ) #verticalListRenderer/horizontalMovieListRenderer
173 r
, e
= parse_result_items ( content
[ 'content' ][ subkey
][ 'items' ])
176 elif key
in [ "reelShelfRenderer" ]:
177 r
, e
= parse_result_items ( content
[ 'items' ])
180 elif key
in [ 'movieRenderer' , 'gridMovieRenderer' ]: # movies to buy/rent
181 pass # gMR.{videoId,title.runs[].text,lengthText.simpleText}
182 elif key
in [ 'carouselAdRenderer' , 'searchPyvRenderer' , 'promotedSparklesTextSearchRenderer' ,
183 'promotedSparklesWebRenderer' , 'compactPromotedItemRenderer' , 'adSlotRenderer' ]: # haha, no.
185 elif key
== 'horizontalCardListRenderer' :
186 # suggested searches: .cards[].searchRefinementCardRenderer.query.runs[].text
188 elif key
== 'emergencyOneboxRenderer' : # suicide prevention hotline
190 elif key
in [ 'clarificationRenderer' , 'infoPanelContainerRenderer' ]: # COVID-19/conspiracy theory infos
192 elif key
== 'webAnswerRenderer' : # "Result from the web"
194 elif key
== 'infoPanelContentRenderer' : # "These results may be new or changing quickly"
196 elif key
== 'hashtagTileRenderer' : # link to '/hashtag/<search_query>'
198 elif key
in [ 'didYouMeanRenderer' , 'showingResultsForRenderer' , 'includingResultsForRenderer' ]:
201 'query' : content
[ 'correctedQueryEndpoint' ][ 'searchEndpoint' ][ 'query' ], # non-misspelled query
202 'autocorrected' : key
in [ 'showingResultsForRenderer' , 'includingResultsForRenderer' ],
204 elif key
== 'messageRenderer' : # "No more results"
207 'message' : content|
G ( 'title' , 'text' ) |G
. text
,
209 elif key
== 'backgroundPromoRenderer' : # e.g. "no results"
211 'type' : content
[ 'icon' ][ 'iconType' ],
212 'message' : content
[ 'title' ] |G
. text
,
215 log_unknown_card ( item
)
216 return results
, extras
218 def parse_channel_items ( items
, channel_id
, author
):
222 key
= next ( iter ( item
. keys ()), None )
224 if key
in [ "gridVideoRenderer" , "videoRenderer" , "videoCardRenderer" , 'reelItemRenderer' ]: # reel==youtube-shorts
225 # only videoCardRenderer (topic channels) has author and channel, others fall back to supplied ones.
226 result
. append ({ 'type' : 'VIDEO' , 'content' : {
227 'video_id' : content
[ 'videoId' ],
228 'title' : content|
G ( 'title' ) |G
. text
or content|
G ( 'headline' ) |G
. text
,
229 'author' : content|
G ( 'bylineText' ) |G
. text
or author
,
230 'channel_id' : ( content|
G ( 'bylineText' ) |
G ( 'runs' )
231 |
Select ( 'navigationEndpoint' )
232 |
G ( 'browseEndpoint' ) |
G ( 'browseId' ) or channel_id
),
233 'length' : ( content|
G ( 'lengthText' ) |G
. text
or # topic channel
234 content|
G ( 'thumbnailOverlays' )
235 |
Select ( 'thumbnailOverlayTimeStatusRenderer' )
237 # topic channel: .metadataText.simpleText = "22M views \u00b7 2 months ago"
238 'views' : content|
G ( 'viewCountText' ) |G
. text|A
. int ,
239 'published' : content|
G ( 'publishedTimeText' ) |G
. text|
A ( age
),
241 elif key
in [ "gridPlaylistRenderer" , "playlistRenderer" , "gridRadioRenderer" ]:
242 result
. append ({ 'type' : 'PLAYLIST' , 'content' : {
243 'playlist_id' : content|
G ( 'navigationEndpoint' ) |
G ( 'watchEndpoint' ) |
G ( 'playlistId' ),
244 'video_id' : content|
G ( 'navigationEndpoint' ) |
G ( 'watchEndpoint' ) |
G ( 'videoId' ),
245 'title' : content|
G ( 'title' ) |G
. text
,
246 'author' : author
, # Note: gridRadioRenderer is by 'Youtube' without channel_id, ignoring that.
247 'channel_id' : channel_id
,
248 'n_videos' : ( content|
G ( 'videoCount' ) |A
. int or # playlistRenderer
249 content|
G ( 'videoCountShortText' , 'videoCountText' ) |G
. text|A
. int ) # grid
251 elif key
== "showRenderer" :
252 result
. append ({ 'type' : 'PLAYLIST' , 'content' : {
253 'playlist_id' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ][ 'playlistId' ],
254 'video_id' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ][ 'videoId' ],
255 'title' : content
[ 'title' ][ 'simpleText' ],
257 'channel_id' : channel_id
,
260 elif key
in [ "gridShowRenderer" ]:
261 result
. append ({ 'type' : 'PLAYLIST' , 'content' : {
262 'playlist_id' : ( content|
G ( 'navigationEndpoint' )
263 |
G ( 'browseEndpoint' ) |
G ( 'browseId' ))[ 2 :],
264 #^: playlistId prefixed with 'VL', which must be removed
266 'title' : content|
G ( 'title' ) |G
. text
,
268 'channel_id' : channel_id
,
269 'n_videos' : content|
G ( 'thumbnailOverlays' ) |
G ( 0 )
270 |
G ( 'thumbnailOverlayBottomPanelRenderer' ) |
G ( 'text' ) |G
. text
,
272 elif key
in [ "itemSectionRenderer" , "gridRenderer" , "horizontalCardListRenderer" , "horizontalListRenderer" ]:
274 "itemSectionRenderer" : 'contents' ,
275 "gridRenderer" : 'items' ,
276 "horizontalCardListRenderer" : 'cards' ,
277 "horizontalListRenderer" : 'items' ,
279 r
, e
= parse_channel_items ( content
[ newkey
], channel_id
, author
)
282 elif key
in [ "shelfRenderer" , "richItemRenderer" ]:
283 r
, e
= parse_channel_items ([ content
[ 'content' ]], channel_id
, author
)
286 elif key
in [ "reelShelfRenderer" ]:
287 r
, e
= parse_channel_items ( content
[ 'items' ], channel_id
, author
)
290 elif key
== "messageRenderer" :
291 # e.g. {'messageRenderer': {'text': {'runs': [{'text': 'This channel has no playlists.'}]}}}
293 elif key
== "gameCardRenderer" :
295 elif key
== "gridChannelRenderer" :
296 pass # don't care; related channels, e.g. on UCMsgXPD3wzzt8RxHJmXH7hQ
297 elif key
== 'continuationItemRenderer' : # handled in parent function
300 log_unknown_card ( item
)
304 def parse_playlist ( item
):
305 key
= next ( iter ( item
. keys ()), None )
307 if key
== "playlistVideoRenderer" :
308 if not content
. get ( 'isPlayable' , False ):
309 return None # private or deleted video
311 return { 'type' : 'VIDEO' , 'content' : {
312 'video_id' : content
[ 'videoId' ],
313 'title' : ( content
[ 'title' ]. get ( 'simpleText' ) or # playable videos
314 content
[ 'title' ]. get ( 'runs' ,[{}])[ 0 ]. get ( 'text' )), # "[Private video]"
315 'playlist_id' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ][ 'playlistId' ],
316 'index' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ]. get ( 'index' , 0 ), #or int(content['index']['simpleText']) (absent on course intros; e.g. PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5)
317 # rest is missing from unplayable videos:
318 'author' : content
. get ( 'shortBylineText' ,{}). get ( 'runs' ,[{}])[ 0 ]. get ( 'text' ),
319 'channel_id' : content
. get ( 'shortBylineText' ,{}). get ( 'runs' ,[{}])[ 0 ]. get ( 'navigationEndpoint' ,{}). get ( 'browseEndpoint' ,{}). get ( 'browseId' ),
320 'length' : ( content
. get ( "lengthText" ,{}). get ( "simpleText" ) or # "8:51"
321 int ( content
. get ( "lengthSeconds" , 0 ))), # "531"
322 'starttime' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ]. get ( 'startTimeSeconds' ),
325 raise Exception ( item
) # XXX TODO