]>
git.gir.st - subscriptionfeed.git/blob - app/frontend.py
10 from urllib
. parse
import parse_qs
11 from flask
import Flask
, render_template
, request
, redirect
, flash
, url_for
, jsonify
, g
16 app
. secret_key
= secrets
. token_bytes ( 16 ) # XXX: generate and hard-code, or cookies and csrf-validation will fail!
17 # Note: currently expiring after 10 minutes. googlevideo-urls are valid for 5h59m, but this makes reddit very stale and premiere videos won't start.
18 requests_cache
. install_cache ( backend
= 'memory' , expire_after
= 10 * 60 , allowable_codes
=( 200 ,))
20 # Note: this should only be required for the 'memory' backed cache.
21 from threading
import Timer
23 requests_cache
. remove_expired_responses ()
24 t
= Timer ( sec
, purge_cache
, args
=( sec
,))
31 return redirect ( url_for ( 'feed' ), code
= 302 )
33 @app . route ( '/feed/subscriptions' )
35 token
= request
. args
. get ( 'token' , 'guest' )
36 page
= int ( request
. args
. get ( 'page' , 0 ))
37 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
40 SELECT videos.id, channel_id, name, title, published, flags.display
42 JOIN channels ON videos.channel_id = channels.id
43 LEFT JOIN flags ON (videos.id = flags.video_id) AND (flags.user = ?)
45 (SELECT channel_id FROM subscriptions WHERE user = ?)
46 AND flags.display IS NOT 'hidden'
47 ORDER BY (display = 'pinned') DESC, crawled DESC
49 OFFSET 36*?""" , ( token
, token
, page
))
52 'channel_id' : channel_id
,
55 'published' : published
,
56 'pinned' : display
== 'pinned' ,
57 } for ( video_id
, channel_id
, author
, title
, published
, display
) in c
. fetchall ()]
58 return render_template ( 'index.html.j2' , rows
= rows
, page
= page
)
62 if not 'v' in request
. args
:
63 return "missing video id" , 400
65 plaintextheader
= { 'content-type' : 'text/plain' , "Link" : "<data:text/css,body%7Bcolor:%23eee;background:%23333%7D>; rel=stylesheet;" }
67 video_id
= request
. args
. get ( 'v' )
68 ( video_url
, metadata
, error_type
, error
) = get_video_info ( video_id
)
69 if error_type
in [ 'initial' , 'player' ]:
70 return error
, 400 , plaintextheader
72 show
= request
. args
. get ( "show" )
75 extra
= { 'geolocked' : 'local=1' , 'livestream' : 'raw=0' }. get ( error
, '' )
76 # if error==exhausted, metadata.playabilityStatus.reason may contain additional information.
77 return f
"{error.upper()}: Redirecting to Invidious." , 502 , { 'Refresh' : f
'2; URL=https://invidio.us/watch?v= {video_id} & {extra} &raw=1' , ** plaintextheader
}
78 return redirect ( video_url
, code
= 307 )
80 return jsonify ( metadata
)
81 else : # todo: handle geolocked, livesteam and the case when we have an exhausted error with no metadata returned
82 return render_template ( 'watch.html.j2' , video_id
= video_id
, video_url
= video_url
, ** prepare_metadata ( metadata
))
84 def prepare_metadata ( metadata
):
85 meta1
= metadata
[ 'videoDetails' ]
86 meta2
= metadata
[ 'microformat' ][ 'playerMicroformatRenderer' ]
87 cards
= metadata
[ 'cards' ][ 'cardCollectionRenderer' ][ 'cards' ] if 'cards' in metadata
else []
88 endsc
= metadata
[ 'endscreen' ][ 'endscreenRenderer' ][ 'elements' ] if 'endscreen' in metadata
else []
90 #aspect_ratio = meta2['embed']['width'] / meta2['embed']['height'], # sometimes absent
91 aspect_ratio
= meta2
[ 'thumbnail' ][ 'thumbnails' ][ 0 ][ 'width' ] / meta2
[ 'thumbnail' ][ 'thumbnails' ][ 0 ][ 'height' ]
95 'code' : cc
[ 'languageCode' ],
96 'autogenerated' : cc
. get ( 'kind' )== "asr" ,
97 'name' : cc
[ 'name' ][ 'simpleText' ]}
98 for cc
in metadata
[ 'captions' ][ 'playerCaptionsTracklistRenderer' ][ 'captionTracks' ]
99 ], key
= lambda cc
: cc
[ 'autogenerated' ]) if 'captionTracks' in metadata
[ 'captions' ][ 'playerCaptionsTracklistRenderer' ] else []
101 def parse_infocard ( card
):
102 card
= card
[ 'cardRenderer' ]
103 teaser
= card
[ 'teaser' ][ 'simpleCardTeaserRenderer' ][ 'message' ][ 'simpleText' ] # not used
104 ctype
= list ( card
[ 'content' ]. keys ())[ 0 ]
105 content
= card
[ 'content' ][ ctype
]
106 if ctype
== "pollRenderer" :
109 'question' : content
[ 'question' ][ 'simpleText' ],
110 'answers' : [( a
[ 'text' ][ 'simpleText' ], a
[ 'numVotes' ]) for a
in content
[ 'choices' ]],
112 elif ctype
== "videoInfoCardContentRenderer" :
115 'video_id' : content
[ 'action' ][ 'watchEndpoint' ][ 'videoId' ],
116 'title' : content
[ 'videoTitle' ][ 'simpleText' ],
117 'author' : content
[ 'channelName' ][ 'simpleText' ], # 'by xXxXx'
118 'length' : content
[ 'lengthString' ][ 'simpleText' ], # '23:03'
119 'views' : content
[ 'viewCountText' ][ 'simpleText' ], # '421,248 views'
121 elif ctype
== "playlistInfoCardContentRenderer" :
124 'playlist_id' : content
[ 'action' ][ 'watchEndpoint' ][ 'playlistId' ],
125 'video_id' : content
[ 'action' ][ 'watchEndpoint' ][ 'videoId' ],
126 'title' : content
[ 'playlistTitle' ][ 'simpleText' ],
127 'author' : content
[ 'channelName' ][ 'simpleText' ],
128 'n_videos' : content
[ 'playlistVideoCount' ][ 'simpleText' ], # '21'
130 elif ctype
== "simpleCardContentRenderer" and 'urlEndpoint' in content
. get ( 'command' ,{}). keys ():
133 'url' : parse_qs ( content
[ 'command' ][ 'urlEndpoint' ][ 'url' ]. split ( '?' )[ 1 ])[ 'q' ][ 0 ],
134 'domain' : content
[ 'displayDomain' ][ 'simpleText' ],
135 'title' : content
[ 'title' ][ 'simpleText' ],
136 'text' : content
[ 'actionButton' ][ 'simpleCardButtonRenderer' ][ 'text' ][ 'simpleText' ],
140 content
= { 'error' : f
" {ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>" }
142 return { 'teaser' : teaser
, 'type' : ctype
, 'content' : content
}
144 def parse_endcard ( card
):
145 card
= card
[ 'endscreenElementRenderer' ] if 'endscreenElementRenderer' in card
. keys () else card
146 ctype
= card
[ 'style' ]
147 if ctype
== "CHANNEL" :
149 'channel_id' : card
[ 'endpoint' ][ 'browseEndpoint' ][ 'browseId' ],
150 'title' : card
[ 'title' ][ 'simpleText' ],
151 'icons' : { e
[ 'height' ]: e
[ 'url' ] for e
in card
[ 'image' ][ 'thumbnails' ]},
153 elif ctype
== "VIDEO" :
155 'video_id' : card
[ 'endpoint' ][ 'watchEndpoint' ][ 'videoId' ],
156 'title' : card
[ 'title' ][ 'simpleText' ],
157 'length' : card
[ 'videoDuration' ][ 'simpleText' ], # '12:21'
158 'views' : card
[ 'metadata' ][ 'simpleText' ], # '51,649 views'
160 elif ctype
== "PLAYLIST" :
162 'playlist_id' : card
[ 'endpoint' ][ 'watchEndpoint' ][ 'playlistId' ],
163 'video_id' : card
[ 'endpoint' ][ 'watchEndpoint' ][ 'videoId' ],
164 'title' : card
[ 'title' ][ 'simpleText' ],
165 'author' : card
[ 'metadata' ][ 'simpleText' ],
166 'n_videos' : card
[ 'playlistLength' ][ 'simpleText' ]. replace ( " videos" , "" ),
168 elif ctype
== "WEBSITE" :
170 'url' : parse_qs ( card
[ 'endpoint' ][ 'urlEndpoint' ][ 'url' ]. split ( '?' )[ 1 ])[ 'q' ][ 0 ],
171 'domain' : card
[ 'metadata' ][ 'simpleText' ],
172 'title' : card
[ 'title' ][ 'simpleText' ],
173 'icons' : { e
[ 'height' ]: e
[ 'url' ] for e
in card
[ 'image' ][ 'thumbnails' ]},
177 content
= { 'error' : f
" {ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>" }
179 return { 'type' : ctype
, 'content' : content
}
182 'title' : meta1
[ 'title' ],
183 'author' : meta1
[ 'author' ],
184 'channel_id' : meta1
[ 'channelId' ],
185 'description' : meta1
[ 'shortDescription' ],
186 'published' : meta2
[ 'publishDate' ],
187 'views' : meta1
[ 'viewCount' ],
188 'length' : int ( meta1
[ 'lengthSeconds' ]),
189 'rating' : meta1
[ 'averageRating' ],
190 'category' : meta2
[ 'category' ],
191 'aspectr' : aspect_ratio
,
192 'unlisted' : meta2
[ 'isUnlisted' ],
193 'countries' : meta2
[ 'availableCountries' ],
194 'poster' : meta2
[ 'thumbnail' ][ 'thumbnails' ][ 0 ][ 'url' ],
195 'infocards' : [ parse_infocard ( card
) for card
in cards
],
196 'endcards' : [ parse_endcard ( card
) for card
in endsc
],
197 'subtitles' : subtitles
,
200 def get_video_info ( video_id
):
202 returns the best-quality muxed video stream, the player_response, error-type/-mesage
203 error types: 'initial': the request to get_video_info was malformed
204 'player': playabilityStatus != OK
205 'internal': [livestream, geolocked, exhausted]
207 # TODO: caching, e.g. beaker? need to not cache premiering-soon videos/livestreams/etc, though
208 # responses are apparently valid for 6h; maybe cache for (video_length - 2h)
209 # TODO: errro types? ["invalid parameters", playabilitystatus, own]
210 # todo: a bit messy; should return all unscrambled video urls in best->worst quality
212 # we try to fetch the video multiple times using different origins
213 ( sts
, algo
) = get_cipher ()
214 for el
in [ 'embedded' , 'detailpage' ]: # ['el-completely-absent',info,leanback,editpage,adunit,previewpage,profilepage]
215 r
= requests
. get ( f
"https://www.youtube.com/get_video_info" +
216 f
"?video_id= {video_id} " +
217 f
"&eurl=https://youtube.googleapis.com/v/ {video_id} " +
220 f
"&hl=en_US" ) #"&hl=en&gl=US"
221 params
= parse_qs ( r
. text
)
222 if 'errorcode' in params
: # status=fail
223 return None , None , 'initial' , f
"MALFORMED: {params['reason'][0]}"
225 metadata
= json
. loads ( params
. get ( 'player_response' )[ 0 ])
226 if metadata
[ 'playabilityStatus' ][ 'status' ] != "OK" :
227 if metadata
[ 'playabilityStatus' ][ 'status' ] == "UNPLAYABLE" :
228 continue # try again with different 'el' value. if none succeeds, we fall into "exhausted" path, which returns last tried metadata, from which the playabilityStatus.reason can be extracted. according to jwz/youtubedown, the worst error message comes from embedded, which is tried first, so it should be overwritten by a better message.
229 return None , None , 'player' , f
"{metadata['playabilityStatus']['status']}: {metadata['playabilityStatus']['reason']}"
230 if 'liveStreamability' in metadata
[ 'playabilityStatus' ]:
231 return None , metadata
, 'internal' , "livestream" # can also check .microformat.liveBroadcastDetails.isLiveNow
233 formats
= metadata
[ 'streamingData' ][ 'formats' ]
234 for ( i
, v
) in enumerate ( formats
):
235 if not ( 'cipher' in v
or 'signatureCipher' in v
): continue
236 cipher
= parse_qs ( v
. get ( 'cipher' ) or v
. get ( 'signatureCipher' ))
237 formats
[ i
][ 'url' ] = unscramble ( cipher
)
239 # todo: check if we have urls or try again
240 url
= sorted ( formats
, key
= lambda k
: k
[ 'height' ], reverse
= True )[ 0 ][ 'url' ]
242 if 'gcr' in parse_qs ( url
):
243 return None , metadata
, 'internal' , "geolocked"
245 return url
, metadata
, None , None
247 return None , metadata
, 'internal' , "exhausted"
249 def unscramble ( cipher
): # test video id: UxxajLWwzqY
250 signature
= list ( cipher
[ 's' ][ 0 ])
251 ( sts
, algo
) = get_cipher ()
252 for c
in algo
. split ():
253 op
, ix
= re
. match ( r
"([rsw])(\d+)?" , c
). groups ()
255 if op
== 'r' : signature
= list ( reversed ( signature
))
256 if op
== 's' : signature
= signature
[ int ( ix
):]
257 if op
== 'w' : signature
[ 0 ], signature
[ int ( ix
)% len ( signature
)] = signature
[ int ( ix
)% len ( signature
)], signature
[ 0 ]
258 sp
= cipher
. get ( 'sp' , [ 'signature' ])[ 0 ]
259 sig
= cipher
[ 'sig' ][ 0 ] if 'sig' in cipher
else '' . join ( signature
)
260 return f
"{cipher['url'][0]}& {sp} = {sig} "
262 @app . route ( '/channel/<channel_id>' )
263 def channel ( channel_id
):
264 if not re
. match ( r
"(UC[A-Za-z0-9_-] {22} )" , channel_id
):
265 return "bad channel id" , 400 # todo
267 xmlfeed
= fetch_xml ( "channel_id" , channel_id
)
269 return "not found or something" , 404 # XXX
270 ( title
, author
, _
, videos
) = parse_xml ( xmlfeed
)
271 return render_template ( 'xmlfeed.html.j2' , title
= author
, rows
= videos
)
273 @app . route ( '/playlist' )
275 playlist_id
= request
. args
. get ( 'list' )
277 return "bad list id" , 400 # todo
279 xmlfeed
= fetch_xml ( "playlist_id" , playlist_id
)
281 return "not found or something" , 404 # XXX
282 ( title
, author
, _
, videos
) = parse_xml ( xmlfeed
)
283 return render_template ( 'xmlfeed.html.j2' , title
= f
" {title} by {author} " , rows
= videos
)
285 @app . route ( '/subscription_manager' )
286 def subscription_manager ():
287 token
= request
. args
. get ( 'token' , 'guest' )
288 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
289 #with conn.cursor() as c:
292 SELECT subscriptions.channel_id, name,
293 (subscribed_until < datetime('now')) AS obsolete
295 left JOIN channels ON channels.id = subscriptions.channel_id
296 left JOIN websub ON channels.id = websub.channel_id
298 ORDER BY obsolete=0, name COLLATE NOCASE ASC""" , ( token
,))
300 'channel_id' : channel_id
,
301 'author' : author
or channel_id
,
302 'subscribed_until' : subscribed_until
303 } for ( channel_id
, author
, subscribed_until
) in c
. fetchall ()]
304 return render_template ( 'subscription_manager.html.j2' , rows
= rows
)
306 @app . route ( '/feed/subscriptions' , methods
=[ 'POST' ])
308 token
= request
. args
. get ( 'token' , 'guest' )
309 if token
== 'guest' : return "guest user is read-only" , 403
310 action
= next ( iter ( k
for k
in request
. form
. keys () if k
!= 'csrf' ), None )
311 if action
in [ 'pin' , 'unpin' , 'hide' ]:
312 video_id
= request
. form
. get ( action
)
318 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
319 #with conn.cursor() as c:
322 INSERT OR REPLACE INTO flags (user, video_id, display)
324 """ , ( token
, video_id
, display
))
326 flash (( "error" , "unsupported action" ))
327 return redirect ( request
. url
, code
= 303 )
329 @app . route ( '/subscription_manager' , methods
=[ 'POST' ])
330 def manage_subscriptions ():
331 token
= request
. args
. get ( 'token' , 'guest' )
332 if token
== 'guest' : return "guest user is read-only" , 403
333 if 'subscribe' in request
. form
:
334 channel_id
= request
. form
. get ( "subscribe" )
335 match
= re
. match ( r
"(UC[A-Za-z0-9_-] {22} )" , channel_id
)
337 channel_id
= match
. group ( 1 )
339 match
= re
. match ( r
"((?:PL|LL|EC|UU|FL|UL|OL)[A-Za-z0-9_-]{10,})" , channel_id
)
340 if match
: # NOTE: PL-playlists are 32chars, others differ in length.
341 flash (( "error" , "playlists not (yet?) supported." ))
342 return redirect ( request
. url
, code
= 303 ) # TODO: dedup redirection
344 flash (( "error" , "not a valid/subscribable URI" ))
345 return redirect ( request
. url
, code
= 303 ) # TODO: dedup redirection
346 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
347 #with conn.cursor() as c:
350 INSERT OR IGNORE INTO subscriptions (user, channel_id)
352 """ , ( token
, channel_id
))
353 # TODO: sql-error-handling, asynchronically calling update-subs.pl
355 elif 'unsubscribe' in request
. form
:
356 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
357 #with conn.cursor() as c:
360 DELETE FROM subscriptions
361 WHERE user = ? AND channel_id = ?
362 """ , ( token
, channel_id
))
363 # TODO: sql-error-handling, report success
366 flash (( "error" , "unsupported action" ))
368 return redirect ( request
. url
, code
= 303 )
373 @app . route ( '/r/<subreddit>' )
374 def reddit ( subreddit
= "videos" ):
375 count
= int ( request
. args
. get ( 'count' , 0 ))
376 before
= request
. args
. get ( 'before' )
377 after
= request
. args
. get ( 'after' )
378 query
= '&' . join ([ f
" {k} = {v} " for k
, v
in [( 'count' , count
), ( 'before' , before
), ( 'after' , after
)] if v
])
379 r
= requests
. get ( f
"https://old.reddit.com/r/ {subreddit} .json? {query} " , headers
={ 'User-Agent' : 'Mozilla/5.0' })
380 if not r
. ok
or not 'data' in r
. json ():
381 return r
. text
+ "error retrieving reddit data" , 502
383 good
= [ e
for e
in r
. json ()[ 'data' ][ 'children' ] if e
[ 'data' ][ 'score' ] > 1 ]
384 bad
= [ e
for e
in r
. json ()[ 'data' ][ 'children' ] if e
[ 'data' ][ 'score' ] <= 1 ]
386 for entry
in ( good
+ bad
):
388 if e
[ 'domain' ] not in [ 'youtube.com' , 'youtu.be' , 'invidio.us' ]:
390 video_id
= re
. match ( r
'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&)?v=|youtu.be/|youtube.com/embed/)([-_0-9A-Za-z]+)' , e
[ 'url' ]). group ( 1 )
391 if not video_id
: continue
393 'video_id' : video_id
,
395 'url' : e
[ 'permalink' ],
396 'n_comments' : e
[ 'num_comments' ],
397 'n_karma' : e
[ 'score' ],
399 before
= r
. json ()[ 'data' ][ 'before' ]
400 after
= r
. json ()[ 'data' ][ 'after' ]
401 return render_template ( 'reddit.html.j2' , subreddit
= subreddit
, rows
= videos
, before
= before
, after
= after
, count
= count
)
404 # reload cipher from database every 1 hour
405 if 'cipher' not in g
or time
. time () - g
. get ( 'cipher_updated' , 0 ) > 1 * 60 * 60 :
406 with sqlite3
. connect ( cf
[ 'global' ][ 'database' ]) as conn
:
408 c
. execute ( "SELECT sts, algorithm FROM cipher" )
409 g
. cipher
= c
. fetchone ()
410 g
. cipher_updated
= time
. time ()
414 #@app.teardown_appcontext
416 # db = g.pop('db', None)
421 # Magic CSRF protection: This modifies outgoing HTML responses and injects a csrf token into all forms.
422 # All post requests are then checked if they contain the valid token.
424 # - don't use regex for injecting
425 # - inject a http header into all responses (that could be used by apis)
426 # - allow csrf token to be passed in http header, json, ...
427 # - a decorator on routes to opt out of verification or output munging
429 def add_csrf_protection ( response
):
430 if response
. mimetype
== "text/html" :
431 token
= hmac
. new ( app
. secret_key
, request
. remote_addr
. encode ( 'ascii' ), hashlib
. sha256
). hexdigest () # TODO: will fail behind reverse proxy (remote_addr always localhost)
432 response
. set_data ( re
. sub (
433 rb
'''(<[Ff][Oo][Rr][Mm](\s+[a-zA-Z0-9-]+(=(\w*|'[^']*'|"[^"]*"))?)*>)''' , # match form tags with any number of attributes and any type of quotes
434 rb
'\1<input type="hidden" name="csrf" value="' + token
. encode ( 'ascii' )+ rb
'">' , # hackily append a hidden input with our csrf protection value
435 response
. get_data ()))
438 def verify_csrf_protection ():
439 token
= hmac
. new ( app
. secret_key
, request
. remote_addr
. encode ( 'ascii' ), hashlib
. sha256
). hexdigest () # TODO: will fail behind reverse proxy (remote_addr always localhost)
440 if request
. method
== "POST" and request
. form
. get ( 'csrf' ) != token
:
441 return "CSRF validation failed!" , 400
442 request
. form
= request
. form
. copy () # make it mutable
443 # request.form.pop('csrf') # XXX: breaks all requests?!
445 @app . template_filter ( 'format_date' )
447 ( y
, m
, d
) = ( int ( n
) for n
in s
. split ( 'T' )[ 0 ]. split ( ' ' )[ 0 ]. split ( '-' )) # iso-dates can seperate date from time with space or 'T'
448 M
= '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec' . split ()
452 from pprint
import pprint
454 pprint ( args
, stream
= codecs
. getwriter ( "utf-8" )( sys
. stderr
. buffer ))
456 if __name__
== '__main__' :