]>
git.gir.st - subscriptionfeed.git/blob - app/frontend.py
8 from urllib
. parse
import parse_qs
9 from flask
import Flask
, render_template
, request
, redirect
, flash
, url_for
, jsonify
, g
14 app
. secret_key
= secrets
. token_bytes ( 16 )
15 # Note: currently expiring after 10 minutes. googlevideo-urls are valid for 5h59m, but this makes reddit very stale and premiere videos won't start.
16 requests_cache
. install_cache ( backend
= 'memory' , expire_after
= 10 * 60 , allowable_codes
=( 200 ,))
18 # Note: this should only be required for the 'memory' backed cache.
19 from threading
import Timer
21 requests_cache
. remove_expired_responses ()
22 t
= Timer ( sec
, purge_cache
, args
=( sec
,))
29 return redirect ( url_for ( 'feed' ), code
= 302 )
31 @app . route ( '/feed/subscriptions' )
33 token
= request
. args
. get ( 'token' , 'guest' )
34 page
= int ( request
. args
. get ( 'page' , 0 ))
35 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
38 SELECT videos.id, channel_id, name, title, published, flags.display
40 JOIN channels ON videos.channel_id = channels.id
41 LEFT JOIN flags ON (videos.id = flags.video_id) AND (flags.user = ?)
43 (SELECT channel_id FROM subscriptions WHERE user = ?)
44 AND flags.display IS NOT 'hidden'
45 ORDER BY (display = 'pinned') DESC, crawled DESC
47 OFFSET 36*?""" , ( token
, token
, page
))
50 'channel_id' : channel_id
,
53 'published' : published
,
54 'pinned' : display
== 'pinned' ,
55 } for ( video_id
, channel_id
, author
, title
, published
, display
) in c
. fetchall ()]
56 return render_template ( 'index.html.j2' , rows
= rows
, page
= page
)
60 if not 'v' in request
. args
:
61 return "missing video id" , 400
63 video_id
= request
. args
. get ( 'v' )
64 ( video_url
, metadata
, error_type
, error
) = get_video_info ( video_id
)
65 if error_type
in [ 'initial' , 'player' ]:
66 return error
, 400 , { 'content-type' : 'text/plain' , "Link" : "<data:text/css,body%7Bcolor:%23eee;background:%23333%7D>; rel=stylesheet;" }
68 show
= request
. args
. get ( "show" )
69 if show
== "metadata" : # todo: handle the case when we have an exhausted error with no metadata returned
70 return render_template ( 'watch.html.j2' , video_id
= video_id
, video_url
= video_url
, ** prepare_metadata ( metadata
))
72 return jsonify ( metadata
)
75 extra
= { 'geolocked' : 'local=1' , 'livestream' : 'raw=0' }. get ( error
, '' )
76 # if error==exhausted, metadata.playabilityStatus.reason may contain additional information.
77 return f
"{error.upper()}: Redirecting to Invidious." , 502 , { 'Refresh' : '2; URL=https://invidio.us/watch?v=' + video_id
+ '&' + extra
+ '&raw=1' , 'content-type' : 'text/plain' , "Link" : "<data:text/css,body%7Bcolor:%23eee;background:%23333%7D>; rel=stylesheet;" }
78 return redirect ( video_url
, code
= 307 )
80 def prepare_metadata ( metadata
):
81 meta1
= metadata
[ 'videoDetails' ]
82 meta2
= metadata
[ 'microformat' ][ 'playerMicroformatRenderer' ]
83 cards
= metadata
[ 'cards' ][ 'cardCollectionRenderer' ][ 'cards' ] if 'cards' in metadata
else []
84 endsc
= metadata
[ 'endscreen' ][ 'endscreenRenderer' ][ 'elements' ] if 'endscreen' in metadata
else []
86 #aspect_ratio = meta2['embed']['width'] / meta2['embed']['height'], # sometimes absent
87 aspect_ratio
= meta2
[ 'thumbnail' ][ 'thumbnails' ][ 0 ][ 'width' ] / meta2
[ 'thumbnail' ][ 'thumbnails' ][ 0 ][ 'height' ]
91 'code' : cc
[ 'languageCode' ],
92 'autogenerated' : cc
. get ( 'kind' )== "asr" ,
93 'name' : cc
[ 'name' ][ 'simpleText' ]}
94 for cc
in metadata
[ 'captions' ][ 'playerCaptionsTracklistRenderer' ][ 'captionTracks' ]
95 ], key
= lambda cc
: cc
[ 'autogenerated' ]) if 'captionTracks' in metadata
[ 'captions' ][ 'playerCaptionsTracklistRenderer' ] else []
97 def parse_infocard ( card
):
98 card
= card
[ 'cardRenderer' ]
99 teaser
= card
[ 'teaser' ][ 'simpleCardTeaserRenderer' ][ 'message' ][ 'simpleText' ] # not used
100 ctype
= list ( card
[ 'content' ]. keys ())[ 0 ]
101 content
= card
[ 'content' ][ ctype
]
102 if ctype
== "pollRenderer" :
105 'question' : content
[ 'question' ][ 'simpleText' ],
106 'answers' : [( a
[ 'text' ][ 'simpleText' ], a
[ 'numVotes' ]) for a
in content
[ 'choices' ]],
108 elif ctype
== "videoInfoCardContentRenderer" :
111 'video_id' : content
[ 'action' ][ 'watchEndpoint' ][ 'videoId' ],
112 'title' : content
[ 'videoTitle' ][ 'simpleText' ],
113 'author' : content
[ 'channelName' ][ 'simpleText' ], # 'by xXxXx'
114 'length' : content
[ 'lengthString' ][ 'simpleText' ], # '23:03'
115 'views' : content
[ 'viewCountText' ][ 'simpleText' ], # '421,248 views'
117 elif ctype
== "playlistInfoCardContentRenderer" :
120 'playlist_id' : content
[ 'action' ][ 'watchEndpoint' ][ 'playlistId' ],
121 'video_id' : content
[ 'action' ][ 'watchEndpoint' ][ 'videoId' ], # XXX: untested
122 'title' : content
[ 'playlistTitle' ][ 'simpleText' ],
123 'author' : content
[ 'channelName' ][ 'simpleText' ],
124 'n_videos' : content
[ 'videoCountText' ][ 'simpleText' ],
126 elif ctype
== "simpleCardContentRenderer" and 'urlEndpoint' in content
. get ( 'command' ,{}). keys ():
129 'url' : parse_qs ( content
[ 'command' ][ 'urlEndpoint' ][ 'url' ]. split ( '?' )[ 1 ])[ 'q' ][ 0 ],
130 'title' : content
[ 'title' ][ 'simpleText' ],
131 'text' : content
[ 'actionButton' ][ 'simpleCardButtonRenderer' ][ 'text' ][ 'simpleText' ],
135 content
= { 'error' : f
" {ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>" }
137 return { 'teaser' : teaser
, 'type' : ctype
, 'content' : content
}
139 def parse_endcard ( card
):
140 card
= card
[ 'endscreenElementRenderer' ] if 'endscreenElementRenderer' in card
. keys () else card
141 ctype
= card
[ 'style' ]
142 if ctype
== "CHANNEL" :
144 'channel_id' : card
[ 'endpoint' ][ 'browseEndpoint' ][ 'browseId' ],
145 'title' : card
[ 'title' ][ 'simpleText' ],
146 'icons' : { e
[ 'height' ]: e
[ 'url' ] for e
in card
[ 'image' ][ 'thumbnails' ]},
148 elif ctype
== "VIDEO" :
150 'video_id' : card
[ 'endpoint' ][ 'watchEndpoint' ][ 'videoId' ],
151 'title' : card
[ 'title' ][ 'simpleText' ],
152 'length' : card
[ 'videoDuration' ][ 'simpleText' ], # '12:21'
153 'views' : card
[ 'metadata' ][ 'simpleText' ], # '51,649 views'
155 elif ctype
== "PLAYLIST" :
157 'playlist_id' : card
[ 'endpoint' ][ 'watchEndpoint' ][ 'playlistId' ],
158 'video_id' : card
[ 'endpoint' ][ 'watchEndpoint' ][ 'videoId' ],
159 'title' : card
[ 'title' ][ 'simpleText' ],
160 'author' : card
[ 'metadata' ][ 'simpleText' ],
161 'n_videos' : card
[ 'playlistLength' ][ 'simpleText' ],
163 elif ctype
== "WEBSITE" :
165 'url' : parse_qs ( card
[ 'endpoint' ][ 'urlEndpoint' ][ 'url' ]. split ( '?' )[ 1 ])[ 'q' ][ 0 ],
166 'title' : card
[ 'title' ][ 'simpleText' ],
167 'icons' : { e
[ 'height' ]: e
[ 'url' ] for e
in card
[ 'image' ][ 'thumbnails' ]},
171 content
= { 'error' : f
" {ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>" }
173 return { 'type' : ctype
, 'content' : content
}
176 'title' : meta1
[ 'title' ],
177 'author' : meta1
[ 'author' ],
178 'channel_id' : meta1
[ 'channelId' ],
179 'description' : meta1
[ 'shortDescription' ],
180 'published' : meta2
[ 'publishDate' ],
181 'views' : meta1
[ 'viewCount' ],
182 'length' : int ( meta1
[ 'lengthSeconds' ]),
183 'rating' : meta1
[ 'averageRating' ],
184 'category' : meta2
[ 'category' ],
185 'aspectr' : aspect_ratio
,
186 'unlisted' : meta2
[ 'isUnlisted' ],
187 'countries' : meta2
[ 'availableCountries' ],
188 'infocards' : [ parse_infocard ( card
) for card
in cards
],
189 'endcards' : [ parse_endcard ( card
) for card
in endsc
],
190 'subtitles' : subtitles
,
193 def get_video_info ( video_id
):
195 returns the best-quality muxed video stream, the player_response, error-type/-mesage
196 error types: 'initial': the request to get_video_info was malformed
197 'player': playabilityStatus != OK
198 'internal': [livestream, geolocked, exhausted]
200 # TODO: caching, e.g. beaker? need to not cache premiering-soon videos/livestreams/etc, though
201 # responses are apparently valid for 6h; maybe cache for (video_length - 2h)
202 # TODO: errro types? ["invalid parameters", playabilitystatus, own]
203 # todo: a bit messy; should return all unscrambled video urls in best->worst quality
205 # we try to fetch the video multiple times using different origins
206 ( sts
, algo
) = get_cipher ()
207 for el
in [ 'embedded' , 'detailpage' ]: # ['el-completely-absent',info,leanback,editpage,adunit,previewpage,profilepage]
208 r
= requests
. get ( f
"https://www.youtube.com/get_video_info" +
209 f
"?video_id= {video_id} " +
210 f
"&eurl=https://youtube.googleapis.com/v/ {video_id} " +
213 f
"&hl=en_US" ) #"&hl=en&gl=US"
214 params
= parse_qs ( r
. text
)
215 if 'errorcode' in params
: # status=fail
216 return None , None , 'initial' , f
"MALFORMED: {params['reason'][0]}"
218 metadata
= json
. loads ( params
. get ( 'player_response' )[ 0 ])
219 if metadata
[ 'playabilityStatus' ][ 'status' ] != "OK" :
220 if metadata
[ 'playabilityStatus' ][ 'status' ] == "UNPLAYABLE" :
221 continue # try again with different 'el' value. if none succeeds, we fall into "exhausted" path, which returns last tried metadata, from which the playabilityStatus.reason can be extracted. according to jwz/youtubedown, the worst error message comes from embedded, which is tried first, so it should be overwritten by a better message.
222 return None , None , 'player' , f
"{metadata['playabilityStatus']['status']}: {metadata['playabilityStatus']['reason']}"
223 if 'liveStreamability' in metadata
[ 'playabilityStatus' ]:
224 return None , metadata
, 'internal' , "livestream" # can also check .microformat.liveBroadcastDetails.isLiveNow
226 formats
= metadata
[ 'streamingData' ][ 'formats' ]
227 for ( i
, v
) in enumerate ( formats
):
228 if not ( 'cipher' in v
or 'signatureCipher' in v
): continue
229 cipher
= parse_qs ( v
. get ( 'cipher' ) or v
. get ( 'signatureCipher' ))
230 formats
[ i
][ 'url' ] = unscramble ( cipher
)
232 # todo: check if we have urls or try again
233 url
= sorted ( formats
, key
= lambda k
: k
[ 'height' ], reverse
= True )[ 0 ][ 'url' ]
235 if 'gcr' in parse_qs ( url
):
236 return None , metadata
, 'internal' , "geolocked"
238 return url
, metadata
, None , None
240 return None , metadata
, 'internal' , "exhausted"
242 def unscramble ( cipher
): # test video id: UxxajLWwzqY
243 signature
= list ( cipher
[ 's' ][ 0 ])
244 ( sts
, algo
) = get_cipher ()
245 for c
in algo
. split ():
246 op
, ix
= re
. match ( r
"([rsw])(\d+)?" , c
). groups ()
248 if op
== 'r' : signature
= list ( reversed ( signature
))
249 if op
== 's' : signature
= signature
[ int ( ix
):]
250 if op
== 'w' : signature
[ 0 ], signature
[ int ( ix
)% len ( signature
)] = signature
[ int ( ix
)% len ( signature
)], signature
[ 0 ]
251 sp
= cipher
. get ( 'sp' , [ 'signature' ])[ 0 ]
252 sig
= cipher
[ 'sig' ][ 0 ] if 'sig' in cipher
else '' . join ( signature
)
253 return f
"{cipher['url'][0]}& {sp} = {sig} "
255 @app . route ( '/channel/<channel_id>' )
256 def channel ( channel_id
):
257 if not re
. match ( r
"(UC[A-Za-z0-9_-] {22} )" , channel_id
):
258 return "bad channel id" , 400 # todo
260 xmlfeed
= fetch_xml ( "channel_id" , channel_id
)
262 return "not found or something" , 404 # XXX
263 ( title
, author
, _
, videos
) = parse_xml ( xmlfeed
)
264 return render_template ( 'xmlfeed.html.j2' , title
= author
, rows
= videos
)
266 @app . route ( '/playlist' )
268 playlist_id
= request
. args
. get ( 'list' )
270 return "bad list id" , 400 # todo
272 xmlfeed
= fetch_xml ( "playlist_id" , playlist_id
)
274 return "not found or something" , 404 # XXX
275 ( title
, author
, _
, videos
) = parse_xml ( xmlfeed
)
276 return render_template ( 'xmlfeed.html.j2' , title
= f
" {title} by {author} " , rows
= videos
)
278 @app . route ( '/subscription_manager' )
279 def subscription_manager ():
280 token
= request
. args
. get ( 'token' , 'guest' )
281 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
282 #with conn.cursor() as c:
285 SELECT subscriptions.channel_id, name,
286 (subscribed_until < datetime('now')) AS obsolete
288 left JOIN channels ON channels.id = subscriptions.channel_id
289 left JOIN websub ON channels.id = websub.channel_id
291 ORDER BY obsolete=0, name COLLATE NOCASE ASC""" , ( token
,))
293 'channel_id' : channel_id
,
294 'author' : author
or channel_id
,
295 'subscribed_until' : subscribed_until
296 } for ( channel_id
, author
, subscribed_until
) in c
. fetchall ()]
297 return render_template ( 'subscription_manager.html.j2' , rows
= rows
)
299 @app . route ( '/feed/subscriptions' , methods
=[ 'POST' ])
301 token
= request
. args
. get ( 'token' , 'guest' )
302 if token
== 'guest' : return "guest user is read-only" , 403
303 action
= next ( request
. form
. keys (), None )
304 if action
in [ 'pin' , 'unpin' , 'hide' ]:
305 video_id
= request
. form
. get ( action
)
311 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
312 #with conn.cursor() as c:
315 INSERT OR REPLACE INTO flags (user, video_id, display)
317 """ , ( token
, video_id
, display
))
319 flash (( "error" , "unsupported action" ))
320 return redirect ( request
. url
, code
= 303 )
322 @app . route ( '/subscription_manager' , methods
=[ 'POST' ])
323 def manage_subscriptions ():
324 token
= request
. args
. get ( 'token' , 'guest' )
325 if token
== 'guest' : return "guest user is read-only" , 403
326 if 'subscribe' in request
. form
:
327 channel_id
= request
. form
. get ( "subscribe" )
328 match
= re
. match ( r
"(UC[A-Za-z0-9_-] {22} )" , channel_id
)
330 channel_id
= match
. group ( 1 )
332 match
= re
. match ( r
"((?:PL|LL|EC|UU|FL|UL|OL)[A-Za-z0-9_-]{10,})" , channel_id
)
333 if match
: # NOTE: PL-playlists are 32chars, others differ in length.
334 flash (( "error" , "playlists not (yet?) supported." ))
335 return redirect ( request
. url
, code
= 303 ) # TODO: dedup redirection
337 flash (( "error" , "not a valid/subscribable URI" ))
338 return redirect ( request
. url
, code
= 303 ) # TODO: dedup redirection
339 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
340 #with conn.cursor() as c:
343 INSERT OR IGNORE INTO subscriptions (user, channel_id)
345 """ , ( token
, channel_id
))
346 # TODO: sql-error-handling, asynchronically calling update-subs.pl
348 elif 'unsubscribe' in request
. form
:
349 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
350 #with conn.cursor() as c:
353 DELETE FROM subscriptions
354 WHERE user = ? AND channel_id = ?
355 """ , ( token
, channel_id
))
356 # TODO: sql-error-handling, report success
359 flash (( "error" , "unsupported action" ))
361 return redirect ( request
. url
, code
= 303 )
366 @app . route ( '/r/<subreddit>' )
367 def reddit ( subreddit
= "videos" ):
368 count
= int ( request
. args
. get ( 'count' , 0 ))
369 before
= request
. args
. get ( 'before' )
370 after
= request
. args
. get ( 'after' )
371 query
= '&' . join ([ f
" {k} = {v} " for k
, v
in [( 'count' , count
), ( 'before' , before
), ( 'after' , after
)] if v
])
372 r
= requests
. get ( f
"https://old.reddit.com/r/ {subreddit} .json? {query} " , headers
={ 'User-Agent' : 'Mozilla/5.0' })
373 if not r
. ok
or not 'data' in r
. json ():
374 return r
. text
+ "error retrieving reddit data" , 502
376 good
= [ e
for e
in r
. json ()[ 'data' ][ 'children' ] if e
[ 'data' ][ 'score' ] > 1 ]
377 bad
= [ e
for e
in r
. json ()[ 'data' ][ 'children' ] if e
[ 'data' ][ 'score' ] <= 1 ]
379 for entry
in ( good
+ bad
):
381 if e
[ 'domain' ] not in [ 'youtube.com' , 'youtu.be' , 'invidio.us' ]:
383 video_id
= re
. match ( r
'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&)?v=|youtu.be/|youtube.com/embed/)([-_0-9A-Za-z]+)' , e
[ 'url' ]). group ( 1 )
384 if not video_id
: continue
386 'video_id' : video_id
,
388 'url' : e
[ 'permalink' ],
389 'n_comments' : e
[ 'num_comments' ],
390 'n_karma' : e
[ 'score' ],
392 before
= r
. json ()[ 'data' ][ 'before' ]
393 after
= r
. json ()[ 'data' ][ 'after' ]
394 return render_template ( 'reddit.html.j2' , subreddit
= subreddit
, rows
= videos
, before
= before
, after
= after
, count
= count
)
397 # reload cipher from database every 1 hour
398 if 'cipher' not in g
or time
. time () - g
. get ( 'cipher_updated' , 0 ) > 1 * 60 * 60 :
399 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
401 c
. execute ( "SELECT sts, algorithm FROM cipher" )
402 g
. cipher
= c
. fetchone ()
403 g
. cipher_updated
= time
. time ()
407 #@app.teardown_appcontext
409 # db = g.pop('db', None)
414 @app . template_filter ( 'format_date' )
416 ( y
, m
, d
) = ( int ( n
) for n
in s
. split ( 'T' )[ 0 ]. split ( ' ' )[ 0 ]. split ( '-' )) # iso-dates can seperate date from time with space or 'T'
417 M
= '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec' . split ()
421 from pprint
import pprint
423 pprint ( args
, stream
= codecs
. getwriter ( "utf-8" )( sys
. stderr
. buffer ))
425 if __name__
== '__main__' :