]>
git.gir.st - subscriptionfeed.git/blob - app/browse/innertube.py
1 # functions that deal with parsing data from youtube's internal API ("innertube")
3 from .. common
. common
import mkthumbs
, log_unknown_card
, G
6 """ |Select('foo') returns the first foo in list, |Select(all='foo') returns all foos. """
7 def __init__ ( self
, key
= None , *, all
= None ):
10 def __ror__ ( self
, other
):
11 try : items
= [ other
[ self
. key
] for other
in other
if self
. key
in other
. keys () ]
13 return items
if self
. all
else items|
G ( 0 )
16 def __init__ ( self
, f
, * args
):
19 def __ror__ ( self
, other
):
20 return self
. f ( other
, * self
. args
)
22 def __ror__ ( self
, other
):
23 try : return int ( '' . join ( filter ( str . isdigit
, other
)))
28 def prepare_searchresults ( yt_results
):
29 contents
= ( # from continuation token
31 |
G ( 'onResponseReceivedCommands' )
32 |
Select ( 'appendContinuationItemsAction' )
33 |
G ( 'continuationItems' )
37 |
G ( 'twoColumnSearchResultsRenderer' )
39 |
G ( 'sectionListRenderer' )
42 items
= contents|
Select ( 'itemSectionRenderer' ) |
G ( 'contents' )
43 items
, extra
= parse_result_items ( items
)
44 more
= contents|
Select ( "continuationItemRenderer" ) |
G ( "continuationEndpoint" ) |
G ( "continuationCommand" ) |
G ( "token" )
45 estimatedResults
= yt_results|
G ( "estimatedResults" )
47 return items
, extra
, more
49 def prepare_channel ( response
, channel_id
, channel_name
):
50 meta1
= response|
G ( 'metadata' ) |
G ( 'channelMetadataRenderer' )
51 meta2
= response|
G ( 'microformat' ) |
G ( 'microformatDataRenderer' )
52 title
= meta1|
G ( 'title' ) or meta2|
G ( 'title' ) or channel_name
53 descr
= meta1|
G ( 'description' ) or meta2|
G ( 'description' ) # meta2.description is capped at 160chars
54 thumb
= mkthumbs (( meta2|
G ( 'thumbnail' ) or meta1|
G ( 'avatar' )) |
G ( 'thumbnails' ) or {}) # .avatar ~ 900px
57 response|
G ( 'continuationContents' ) or
58 response|
G ( 'onResponseReceivedActions' )
60 if not contents
: # overran end of list
61 return title
, descr
, thumb
, [], False
63 unparsed
= contents|
G ( 'gridContinuation' ) |
G ( 'items' ) or \
64 contents|
G ( 'sectionListContinuation' ) |
G ( 'contents' ) or \
65 contents|
G ( 'richGridContinuation' ) |
G ( 'contents' ) or \
66 contents|
Select ( 'appendContinuationItemsAction' ) |
G ( 'continuationItems' ) or \
67 contents|
G (- 1 ) |
G ( 'reloadContinuationItemsCommand' ) |
G ( 'continuationItems' ) or []
68 items
, extra
= parse_channel_items ( unparsed
, channel_id
, title
)
70 more
= ( # videos, livestreams
72 |
Select ( 'continuationItemRenderer' )
73 |
G ( 'continuationEndpoint' )
74 |
G ( 'continuationCommand' )
76 ) or ( # playlists, search
78 |
G ( 'gridContinuation' , 'sectionListContinuation' )
80 |
Select ( 'nextContinuationData' )
84 return title
, descr
, thumb
, items
, more
86 def prepare_playlist ( result
):
87 contents
= result
[ 'continuationContents' ]
88 unparsed
= contents
[ 'playlistVideoListContinuation' ]. get ( 'contents' ,[])
91 |
G ( 'playlistVideoListContinuation' )
93 |
Select ( 'nextContinuationData' )
97 meta
= result|
G ( 'sidebar' ) |
G ( 'playlistSidebarRenderer' ) |
G ( 'items' )
98 meta1
= meta|
Select ( 'playlistSidebarPrimaryInfoRenderer' )
99 meta2
= meta|
Select ( 'playlistSidebarSecondaryInfoRenderer' ) \
100 |
G ( 'videoOwner' ) |
G ( 'videoOwnerRenderer' )
101 title
= meta1|
G ( 'title' ) |G
. text
102 author
= meta2|
G ( 'title' ) |G
. text
103 channel_id
= meta2|
G ( 'navigationEndpoint' ) |
G ( 'browseEndpoint' ) |
G ( 'browseId' )
105 return title
, author
, channel_id
, list ( filter ( None , map ( parse_playlist
, unparsed
))), more
108 if s
is None : # missing from autogen'd music, some livestreams
110 # Some livestreams have "Streamed 7 hours ago"
111 s
= s
. replace ( "Streamed " , "" )
112 # Now, everything should be in the form "1 year ago"
113 value
, unit
, _
= s
. split ( " " )
117 ). get ( unit
, unit
[ 0 ]) # first letter otherwise (e.g. year(s) => y)
119 return f
" {value}{suffix} "
121 def parse_result_items ( items
):
122 # TODO: use .get() for most non-essential attributes
124 parses youtube search response into an easier to use format.
129 key
= next ( iter ( item
. keys ()), None )
131 if key
in [ 'videoRenderer' , 'reelItemRenderer' , 'gridVideoRenderer' ]:
132 results
. append ({ 'type' : 'VIDEO' , 'content' : {
133 'video_id' : content
[ 'videoId' ],
134 'title' : content|
G ( 'title' ) |G
. text
or content|
G ( 'headline' ) |G
. text
,
135 'author' : content|
G ( 'longBylineText' , 'shortBylineText' ) |G
. text
,
136 'channel_id' : content|
G ( 'ownerText' ) |
G ( 'runs' ) |
G ( 0 ) \
137 |
G ( 'navigationEndpoint' ) |
G ( 'browseEndpoint' ) |
G ( 'browseId' ) \
138 or content|
G ( "channelThumbnailSupportedRenderers" ) | \
139 G ( "channelThumbnailWithLinkRenderer" ) |
G ( "navigationEndpoint" ) | \
140 G ( "browseEndpoint" ) |
G ( "browseId" ),
141 'length' : content|
G ( 'lengthText' ) |G
. text
, # "44:07", "1:41:50"
142 'views' : content|
G ( 'viewCountText' ) |G
. text|A
. int or 0 , # "1,234 {views|watching}", absent on 0 views
143 'published' : content|
G ( 'publishedTimeText' ) |
G ( 'simpleText' ) |
A ( age
),
144 'live' : content|
G ( 'badges' ) |
Select ( 'metadataBadgeRenderer' ) |
G ( 'style' )== 'BADGE_STYLE_TYPE_LIVE_NOW' ,
146 elif key
in [ 'playlistRenderer' , 'radioRenderer' , 'showRenderer' ]: # radio == "Mix" playlist, show == normal playlist, specially displayed
147 results
. append ({ 'type' : 'PLAYLIST' , 'content' : {
148 'playlist_id' : content
[ 'navigationEndpoint' ] |
G ( 'watchEndpoint' ) |
G ( 'playlistId' ),
149 'video_id' : content
[ 'navigationEndpoint' ] |
G ( 'watchEndpoint' ) |
G ( 'videoId' ),
150 'title' : content
[ 'title' ] |G
. text
,
151 'author' : content|
G ( 'longBylineText' , 'shortBylineText' ) |G
. text
,
152 'channel_id' : content|
G ( 'longBylineText' , 'shortBylineText' ) |
G ( 'runs' ) |
G ( 0 ) \
153 |
G ( 'navigationEndpoint' ) |
G ( 'browseEndpoint' ) |
G ( 'browseId' ),
154 'n_videos' : content|
G ( 'videoCount' ) |A
. int or \
155 content|
G ( 'videoCountShortText' , 'videoCountText' ) |G
. text
, # "Mix" playlists
157 elif key
== 'channelRenderer' :
158 results
. append ({ 'type' : 'CHANNEL' , 'content' : {
159 'channel_id' : content
[ 'channelId' ],
160 'title' : content
[ 'title' ] |G
. text
,
161 'icons' : content
[ 'thumbnail' ][ 'thumbnails' ] |
A ( mkthumbs
),
162 'subscribers' : content|
G ( 'subscriberCountText' ) |
G ( 'simpleText' ), # "2.47K subscribers"
164 elif key
== 'shelfRenderer' :
165 subkey
= next ( iter ( content
[ 'content' ]. keys ()), None ) #verticalListRenderer/horizontalMovieListRenderer
166 r
, e
= parse_result_items ( content
[ 'content' ][ subkey
][ 'items' ])
169 elif key
in [ "reelShelfRenderer" ]:
170 r
, e
= parse_result_items ( content
[ 'items' ])
173 elif key
in [ 'movieRenderer' , 'gridMovieRenderer' ]: # movies to buy/rent
174 pass # gMR.{videoId,title.runs[].text,lengthText.simpleText}
175 elif key
in [ 'carouselAdRenderer' , 'searchPyvRenderer' , 'promotedSparklesTextSearchRenderer' ,
176 'promotedSparklesWebRenderer' , 'compactPromotedItemRenderer' , 'adSlotRenderer' ]: # haha, no.
178 elif key
== 'horizontalCardListRenderer' :
179 # suggested searches: .cards[].searchRefinementCardRenderer.query.runs[].text
181 elif key
== 'emergencyOneboxRenderer' : # suicide prevention hotline
183 elif key
in [ 'clarificationRenderer' , 'infoPanelContainerRenderer' ]: # COVID-19/conspiracy theory infos
185 elif key
== 'webAnswerRenderer' : # "Result from the web"
187 elif key
== 'infoPanelContentRenderer' : # "These results may be new or changing quickly"
189 elif key
== 'hashtagTileRenderer' : # link to '/hashtag/<search_query>'
191 elif key
in [ 'didYouMeanRenderer' , 'showingResultsForRenderer' , 'includingResultsForRenderer' ]:
194 'query' : content
[ 'correctedQueryEndpoint' ][ 'searchEndpoint' ][ 'query' ], # non-misspelled query
195 'autocorrected' : key
in [ 'showingResultsForRenderer' , 'includingResultsForRenderer' ],
197 elif key
== 'messageRenderer' : # "No more results"
200 'message' : content|
G ( 'title' , 'text' ) |G
. text
,
202 elif key
== 'backgroundPromoRenderer' : # e.g. "no results"
204 'type' : content
[ 'icon' ][ 'iconType' ],
205 'message' : content
[ 'title' ] |G
. text
,
208 log_unknown_card ( item
)
209 return results
, extras
211 def parse_channel_items ( items
, channel_id
, author
):
215 key
= next ( iter ( item
. keys ()), None )
217 if key
in [ "gridVideoRenderer" , "videoRenderer" , "videoCardRenderer" , 'reelItemRenderer' ]: # reel==youtube-shorts
218 # only videoCardRenderer (topic channels) has author and channel, others fall back to supplied ones.
219 result
. append ({ 'type' : 'VIDEO' , 'content' : {
220 'video_id' : content
[ 'videoId' ],
221 'title' : content|
G ( 'title' ) |G
. text
or content|
G ( 'headline' ) |G
. text
,
222 'author' : content|
G ( 'bylineText' ) |G
. text
or author
,
223 'channel_id' : ( content|
G ( 'bylineText' ) |
G ( 'runs' )
224 |
Select ( 'navigationEndpoint' )
225 |
G ( 'browseEndpoint' ) |
G ( 'browseId' ) or channel_id
),
226 'length' : ( content|
G ( 'lengthText' ) |G
. text
or # topic channel
227 content|
G ( 'thumbnailOverlays' )
228 |
Select ( 'thumbnailOverlayTimeStatusRenderer' )
230 # topic channel: .metadataText.simpleText = "22M views \u00b7 2 months ago"
231 'views' : content|
G ( 'viewCountText' ) |G
. text|A
. int ,
232 'published' : content|
G ( 'publishedTimeText' ) |G
. text|
A ( age
),
234 elif key
in [ "gridPlaylistRenderer" , "playlistRenderer" , "gridRadioRenderer" ]:
235 result
. append ({ 'type' : 'PLAYLIST' , 'content' : {
236 'playlist_id' : content|
G ( 'navigationEndpoint' ) |
G ( 'watchEndpoint' ) |
G ( 'playlistId' ),
237 'video_id' : content|
G ( 'navigationEndpoint' ) |
G ( 'watchEndpoint' ) |
G ( 'videoId' ),
238 'title' : content|
G ( 'title' ) |G
. text
,
239 'author' : author
, # Note: gridRadioRenderer is by 'Youtube' without channel_id, ignoring that.
240 'channel_id' : channel_id
,
241 'n_videos' : ( content|
G ( 'videoCount' ) |A
. int or # playlistRenderer
242 content|
G ( 'videoCountShortText' , 'videoCountText' ) |G
. text|A
. int ) # grid
244 elif key
== "showRenderer" :
245 result
. append ({ 'type' : 'PLAYLIST' , 'content' : {
246 'playlist_id' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ][ 'playlistId' ],
247 'video_id' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ][ 'videoId' ],
248 'title' : content
[ 'title' ][ 'simpleText' ],
250 'channel_id' : channel_id
,
253 elif key
in [ "gridShowRenderer" ]:
254 result
. append ({ 'type' : 'PLAYLIST' , 'content' : {
255 'playlist_id' : ( content|
G ( 'navigationEndpoint' )
256 |
G ( 'browseEndpoint' ) |
G ( 'browseId' ))[ 2 :],
257 #^: playlistId prefixed with 'VL', which must be removed
259 'title' : content|
G ( 'title' ) |G
. text
,
261 'channel_id' : channel_id
,
262 'n_videos' : content|
G ( 'thumbnailOverlays' ) |
G ( 0 )
263 |
G ( 'thumbnailOverlayBottomPanelRenderer' ) |
G ( 'text' ) |G
. text
,
265 elif key
in [ "itemSectionRenderer" , "gridRenderer" , "horizontalCardListRenderer" , "horizontalListRenderer" ]:
267 "itemSectionRenderer" : 'contents' ,
268 "gridRenderer" : 'items' ,
269 "horizontalCardListRenderer" : 'cards' ,
270 "horizontalListRenderer" : 'items' ,
272 r
, e
= parse_channel_items ( content
[ newkey
], channel_id
, author
)
275 elif key
in [ "shelfRenderer" , "richItemRenderer" ]:
276 r
, e
= parse_channel_items ([ content
[ 'content' ]], channel_id
, author
)
279 elif key
in [ "reelShelfRenderer" ]:
280 r
, e
= parse_channel_items ( content
[ 'items' ], channel_id
, author
)
283 elif key
== "messageRenderer" :
284 # e.g. {'messageRenderer': {'text': {'runs': [{'text': 'This channel has no playlists.'}]}}}
286 elif key
== "gameCardRenderer" :
288 elif key
== "gridChannelRenderer" :
289 pass # don't care; related channels, e.g. on UCMsgXPD3wzzt8RxHJmXH7hQ
290 elif key
== 'continuationItemRenderer' : # handled in parent function
293 log_unknown_card ( item
)
297 def parse_playlist ( item
):
298 key
= next ( iter ( item
. keys ()), None )
300 if key
== "playlistVideoRenderer" :
301 if not content
. get ( 'isPlayable' , False ):
302 return None # private or deleted video
304 return { 'type' : 'VIDEO' , 'content' : {
305 'video_id' : content
[ 'videoId' ],
306 'title' : ( content
[ 'title' ]. get ( 'simpleText' ) or # playable videos
307 content
[ 'title' ]. get ( 'runs' ,[{}])[ 0 ]. get ( 'text' )), # "[Private video]"
308 'playlist_id' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ][ 'playlistId' ],
309 'index' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ]. get ( 'index' , 0 ), #or int(content['index']['simpleText']) (absent on course intros; e.g. PL96C35uN7xGJu6skU4TBYrIWxggkZBrF5)
310 # rest is missing from unplayable videos:
311 'author' : content
. get ( 'shortBylineText' ,{}). get ( 'runs' ,[{}])[ 0 ]. get ( 'text' ),
312 'channel_id' : content
. get ( 'shortBylineText' ,{}). get ( 'runs' ,[{}])[ 0 ]. get ( 'navigationEndpoint' ,{}). get ( 'browseEndpoint' ,{}). get ( 'browseId' ),
313 'length' : ( content
. get ( "lengthText" ,{}). get ( "simpleText" ) or # "8:51"
314 int ( content
. get ( "lengthSeconds" , 0 ))), # "531"
315 'starttime' : content
[ 'navigationEndpoint' ][ 'watchEndpoint' ]. get ( 'startTimeSeconds' ),
318 raise Exception ( item
) # XXX TODO