]> git.gir.st - subscriptionfeed.git/blob - app/frontend.py
initial commit
[subscriptionfeed.git] / app / frontend.py
1 import re
2 import json
3 import time
4 import sqlite3
5 import secrets
6 import requests
7 import requests_cache
8 from urllib.parse import parse_qs
9 from flask import Flask, render_template, request, redirect, flash, url_for, jsonify, g
10
11 from common import *
12
13 app = Flask(__name__)
14 app.secret_key = secrets.token_bytes(16)
15 # Note: currently expiring after 10 minutes. googlevideo-urls are valid for 5h59m, but this makes reddit very stale and premiere videos won't start.
16 requests_cache.install_cache(backend='memory', expire_after=10*60, allowable_codes=(200,))
17
18 # Note: this should only be required for the 'memory' backed cache.
19 from threading import Timer
20 def purge_cache(sec):
21 requests_cache.remove_expired_responses()
22 t = Timer(sec, purge_cache, args=(sec,))
23 t.setDaemon(True)
24 t.start()
25 purge_cache(10*60)
26
27 @app.route('/')
28 def index():
29 return redirect(url_for('feed'), code=302)
30
31 @app.route('/feed/subscriptions')
32 def feed():
33 token = request.args.get('token', 'guest')
34 page = int(request.args.get('page', 0))
35 with sqlite3.connect(cf['global']['database']) as conn:
36 c = conn.cursor()
37 c.execute("""
38 SELECT videos.id, channel_id, name, title, published, flags.display
39 FROM videos
40 JOIN channels ON videos.channel_id = channels.id
41 LEFT JOIN flags ON (videos.id = flags.video_id) AND (flags.user = ?)
42 WHERE channel_id IN
43 (SELECT channel_id FROM subscriptions WHERE user = ?)
44 AND flags.display IS NOT 'hidden'
45 ORDER BY (display = 'pinned') DESC, crawled DESC
46 LIMIT 36
47 OFFSET 36*?""", (token, token, page))
48 rows = [{
49 'video_id': video_id,
50 'channel_id': channel_id,
51 'author': author,
52 'title': title,
53 'published': published,
54 'pinned': display == 'pinned',
55 } for (video_id, channel_id, author, title, published, display) in c.fetchall()]
56 return render_template('index.html.j2', rows=rows, page=page)
57
58 @app.route('/watch')
59 def watch():
60 if not 'v' in request.args:
61 return "missing video id", 400
62
63 video_id = request.args.get('v')
64 (video_url, metadata, error_type, error) = get_video_info(video_id)
65 if error_type in ['initial', 'player']:
66 return error, 400, {'content-type': 'text/plain',"Link": "<data:text/css,body%7Bcolor:%23eee;background:%23333%7D>; rel=stylesheet;"}
67
68 show = request.args.get("show")
69 if show == "metadata": # todo: handle the case when we have an exhausted error with no metadata returned
70 return render_template('watch.html.j2', video_id=video_id, video_url=video_url, **prepare_metadata(metadata))
71 elif show == "json":
72 return jsonify(metadata)
73 else:
74 if error:
75 extra = {'geolocked':'local=1', 'livestream':'raw=0'}.get(error,'')
76 # if error==exhausted, metadata.playabilityStatus.reason may contain additional information.
77 return f"{error.upper()}: Redirecting to Invidious.", 502, {'Refresh': '2; URL=https://invidio.us/watch?v='+video_id+'&'+extra+'&raw=1','content-type': 'text/plain',"Link": "<data:text/css,body%7Bcolor:%23eee;background:%23333%7D>; rel=stylesheet;"}
78 return redirect(video_url, code=307)
79
80 def prepare_metadata(metadata):
81 meta1 = metadata['videoDetails']
82 meta2 = metadata['microformat']['playerMicroformatRenderer']
83 cards = metadata['cards']['cardCollectionRenderer']['cards'] if 'cards' in metadata else []
84 endsc = metadata['endscreen']['endscreenRenderer']['elements'] if 'endscreen' in metadata else []
85
86 #aspect_ratio = meta2['embed']['width'] / meta2['embed']['height'], # sometimes absent
87 aspect_ratio = meta2['thumbnail']['thumbnails'][0]['width'] / meta2['thumbnail']['thumbnails'][0]['height']
88
89 subtitles = sorted([
90 {'url':cc['baseUrl'],
91 'code':cc['languageCode'],
92 'autogenerated':cc.get('kind')=="asr",
93 'name':cc['name']['simpleText']}
94 for cc in metadata['captions']['playerCaptionsTracklistRenderer']['captionTracks']
95 ], key=lambda cc: cc['autogenerated']) if 'captionTracks' in metadata['captions']['playerCaptionsTracklistRenderer'] else []
96
97 def parse_infocard(card):
98 card = card['cardRenderer']
99 teaser = card['teaser']['simpleCardTeaserRenderer']['message']['simpleText'] # not used
100 ctype = list(card['content'].keys())[0]
101 content = card['content'][ctype]
102 if ctype == "pollRenderer":
103 ctype = "POLL"
104 content = {
105 'question': content['question']['simpleText'],
106 'answers': [(a['text']['simpleText'],a['numVotes']) for a in content['choices']],
107 }
108 elif ctype == "videoInfoCardContentRenderer":
109 ctype = "VIDEO"
110 content = {
111 'video_id': content['action']['watchEndpoint']['videoId'],
112 'title': content['videoTitle']['simpleText'],
113 'author': content['channelName']['simpleText'], # 'by xXxXx'
114 'length': content['lengthString']['simpleText'], # '23:03'
115 'views': content['viewCountText']['simpleText'], # '421,248 views'
116 }
117 elif ctype == "playlistInfoCardContentRenderer":
118 ctype = "PLAYLIST"
119 content = {
120 'playlist_id': content['action']['watchEndpoint']['playlistId'],
121 'video_id': content['action']['watchEndpoint']['videoId'], # XXX: untested
122 'title': content['playlistTitle']['simpleText'],
123 'author': content['channelName']['simpleText'],
124 'n_videos': content['videoCountText']['simpleText'],
125 }
126 elif ctype == "simpleCardContentRenderer" and 'urlEndpoint' in content.get('command',{}).keys():
127 ctype = "WEBSITE"
128 content = {
129 'url': parse_qs(content['command']['urlEndpoint']['url'].split('?')[1])['q'][0],
130 'title': content['title']['simpleText'],
131 'text': content['actionButton']['simpleCardButtonRenderer']['text']['simpleText'],
132 }
133 else:
134 import pprint
135 content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
136
137 return {'teaser': teaser, 'type': ctype, 'content': content}
138
139 def parse_endcard(card):
140 card = card['endscreenElementRenderer'] if 'endscreenElementRenderer' in card.keys() else card
141 ctype = card['style']
142 if ctype == "CHANNEL":
143 content = {
144 'channel_id': card['endpoint']['browseEndpoint']['browseId'],
145 'title': card['title']['simpleText'],
146 'icons': {e['height']: e['url'] for e in card['image']['thumbnails']},
147 }
148 elif ctype == "VIDEO":
149 content = {
150 'video_id': card['endpoint']['watchEndpoint']['videoId'],
151 'title': card['title']['simpleText'],
152 'length': card['videoDuration']['simpleText'], # '12:21'
153 'views': card['metadata']['simpleText'], # '51,649 views'
154 }
155 elif ctype == "PLAYLIST":
156 content = {
157 'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
158 'video_id': card['endpoint']['watchEndpoint']['videoId'],
159 'title': card['title']['simpleText'],
160 'author': card['metadata']['simpleText'],
161 'n_videos': card['playlistLength']['simpleText'],
162 }
163 elif ctype == "WEBSITE":
164 content = {
165 'url': parse_qs(card['endpoint']['urlEndpoint']['url'].split('?')[1])['q'][0],
166 'title': card['title']['simpleText'],
167 'icons': {e['height']: e['url'] for e in card['image']['thumbnails']},
168 }
169 else:
170 import pprint
171 content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
172
173 return {'type': ctype, 'content': content}
174
175 return {
176 'title': meta1['title'],
177 'author': meta1['author'],
178 'channel_id': meta1['channelId'],
179 'description': meta1['shortDescription'],
180 'published': meta2['publishDate'],
181 'views': meta1['viewCount'],
182 'length': int(meta1['lengthSeconds']),
183 'rating': meta1['averageRating'],
184 'category': meta2['category'],
185 'aspectr': aspect_ratio,
186 'unlisted': meta2['isUnlisted'],
187 'countries': meta2['availableCountries'],
188 'infocards': [parse_infocard(card) for card in cards],
189 'endcards': [parse_endcard(card) for card in endsc],
190 'subtitles': subtitles,
191 }
192
193 def get_video_info(video_id):
194 """
195 returns the best-quality muxed video stream, the player_response, error-type/-mesage
196 error types: 'initial': the request to get_video_info was malformed
197 'player': playabilityStatus != OK
198 'internal': [livestream, geolocked, exhausted]
199 """
200 # TODO: caching, e.g. beaker? need to not cache premiering-soon videos/livestreams/etc, though
201 # responses are apparently valid for 6h; maybe cache for (video_length - 2h)
202 # TODO: errro types? ["invalid parameters", playabilitystatus, own]
203 # todo: a bit messy; should return all unscrambled video urls in best->worst quality
204
205 # we try to fetch the video multiple times using different origins
206 (sts, algo) = get_cipher()
207 for el in ['embedded', 'detailpage']: # ['el-completely-absent',info,leanback,editpage,adunit,previewpage,profilepage]
208 r = requests.get(f"https://www.youtube.com/get_video_info"+
209 f"?video_id={video_id}"+
210 f"&eurl=https://youtube.googleapis.com/v/{video_id}"+
211 f"&el={el}"+
212 f"&sts={sts}"+
213 f"&hl=en_US") #"&hl=en&gl=US"
214 params = parse_qs(r.text)
215 if 'errorcode' in params: # status=fail
216 return None, None, 'initial', f"MALFORMED: {params['reason'][0]}"
217
218 metadata = json.loads(params.get('player_response')[0])
219 if metadata['playabilityStatus']['status'] != "OK":
220 if metadata['playabilityStatus']['status'] == "UNPLAYABLE":
221 continue # try again with different 'el' value. if none succeeds, we fall into "exhausted" path, which returns last tried metadata, from which the playabilityStatus.reason can be extracted. according to jwz/youtubedown, the worst error message comes from embedded, which is tried first, so it should be overwritten by a better message.
222 return None, None, 'player', f"{metadata['playabilityStatus']['status']}: {metadata['playabilityStatus']['reason']}"
223 if 'liveStreamability' in metadata['playabilityStatus']:
224 return None, metadata, 'internal', "livestream" # can also check .microformat.liveBroadcastDetails.isLiveNow
225
226 formats = metadata['streamingData']['formats']
227 for (i,v) in enumerate(formats):
228 if not ('cipher' in v or 'signatureCipher' in v): continue
229 cipher = parse_qs(v.get('cipher') or v.get('signatureCipher'))
230 formats[i]['url'] = unscramble(cipher)
231
232 # todo: check if we have urls or try again
233 url = sorted(formats, key=lambda k: k['height'], reverse=True)[0]['url']
234
235 if 'gcr' in parse_qs(url):
236 return None, metadata, 'internal', "geolocked"
237
238 return url, metadata, None, None
239 else:
240 return None, metadata, 'internal', "exhausted"
241
242 def unscramble(cipher): # test video id: UxxajLWwzqY
243 signature = list(cipher['s'][0])
244 (sts, algo) = get_cipher()
245 for c in algo.split():
246 op, ix = re.match(r"([rsw])(\d+)?", c).groups()
247 if not op: continue
248 if op == 'r': signature = list(reversed(signature))
249 if op == 's': signature = signature[int(ix):]
250 if op == 'w': signature[0], signature[int(ix)%len(signature)] = signature[int(ix)%len(signature)], signature[0]
251 sp = cipher.get('sp', ['signature'])[0]
252 sig = cipher['sig'][0] if 'sig' in cipher else ''.join(signature)
253 return f"{cipher['url'][0]}&{sp}={sig}"
254
255 @app.route('/channel/<channel_id>')
256 def channel(channel_id):
257 if not re.match(r"(UC[A-Za-z0-9_-]{22})", channel_id):
258 return "bad channel id", 400 # todo
259
260 xmlfeed = fetch_xml("channel_id", channel_id)
261 if not xmlfeed:
262 return "not found or something", 404 # XXX
263 (title, author, _, videos) = parse_xml(xmlfeed)
264 return render_template('xmlfeed.html.j2', title=author, rows=videos)
265
266 @app.route('/playlist')
267 def playlist():
268 playlist_id = request.args.get('list')
269 if not playlist_id:
270 return "bad list id", 400 # todo
271
272 xmlfeed = fetch_xml("playlist_id", playlist_id)
273 if not xmlfeed:
274 return "not found or something", 404 # XXX
275 (title, author, _, videos) = parse_xml(xmlfeed)
276 return render_template('xmlfeed.html.j2', title=f"{title} by {author}", rows=videos)
277
278 @app.route('/subscription_manager')
279 def subscription_manager():
280 token = request.args.get('token', 'guest')
281 with sqlite3.connect(cf['global']['database']) as conn:
282 #with conn.cursor() as c:
283 c = conn.cursor()
284 c.execute("""
285 SELECT subscriptions.channel_id, name,
286 (subscribed_until < datetime('now')) AS obsolete
287 FROM subscriptions
288 left JOIN channels ON channels.id = subscriptions.channel_id
289 left JOIN websub ON channels.id = websub.channel_id
290 WHERE user = ?
291 ORDER BY obsolete=0, name COLLATE NOCASE ASC""", (token,))
292 rows = [{
293 'channel_id': channel_id,
294 'author': author or channel_id,
295 'subscribed_until': subscribed_until
296 } for (channel_id, author, subscribed_until) in c.fetchall()]
297 return render_template('subscription_manager.html.j2', rows=rows)
298
299 @app.route('/feed/subscriptions', methods=['POST'])
300 def feed_post():
301 token = request.args.get('token', 'guest')
302 if token == 'guest': return "guest user is read-only", 403
303 action = next(request.form.keys(), None)
304 if action in ['pin', 'unpin', 'hide']:
305 video_id = request.form.get(action)
306 display = {
307 'pin': 'pinned',
308 'unpin': None,
309 'hide': 'hidden',
310 }[action]
311 with sqlite3.connect(cf['global']['database']) as conn:
312 #with conn.cursor() as c:
313 c = conn.cursor()
314 c.execute("""
315 INSERT OR REPLACE INTO flags (user, video_id, display)
316 VALUES (?, ?, ?)
317 """, (token, video_id, display))
318 else:
319 flash(("error","unsupported action"))
320 return redirect(request.url, code=303)
321
322 @app.route('/subscription_manager', methods=['POST'])
323 def manage_subscriptions():
324 token = request.args.get('token', 'guest')
325 if token == 'guest': return "guest user is read-only", 403
326 if 'subscribe' in request.form:
327 channel_id = request.form.get("subscribe")
328 match = re.match(r"(UC[A-Za-z0-9_-]{22})", channel_id)
329 if match:
330 channel_id = match.group(1)
331 else:
332 match = re.match(r"((?:PL|LL|EC|UU|FL|UL|OL)[A-Za-z0-9_-]{10,})", channel_id)
333 if match: # NOTE: PL-playlists are 32chars, others differ in length.
334 flash(("error","playlists not (yet?) supported."))
335 return redirect(request.url, code=303) # TODO: dedup redirection
336 else:
337 flash(("error","not a valid/subscribable URI"))
338 return redirect(request.url, code=303) # TODO: dedup redirection
339 with sqlite3.connect(cf['global']['database']) as conn:
340 #with conn.cursor() as c:
341 c = conn.cursor()
342 c.execute("""
343 INSERT OR IGNORE INTO subscriptions (user, channel_id)
344 VALUES (?, ?)
345 """, (token, channel_id))
346 # TODO: sql-error-handling, asynchronically calling update-subs.pl
347
348 elif 'unsubscribe' in request.form:
349 with sqlite3.connect(cf['global']['database']) as conn:
350 #with conn.cursor() as c:
351 c = conn.cursor()
352 c.execute("""
353 DELETE FROM subscriptions
354 WHERE user = ? AND channel_id = ?
355 """, (token, channel_id))
356 # TODO: sql-error-handling, report success
357
358 else:
359 flash(("error","unsupported action"))
360
361 return redirect(request.url, code=303)
362
363 @app.route('/r/')
364 def reddit_index():
365 return ""
366 @app.route('/r/<subreddit>')
367 def reddit(subreddit="videos"):
368 count = int(request.args.get('count', 0))
369 before = request.args.get('before')
370 after = request.args.get('after')
371 query = '&'.join([f"{k}={v}" for k,v in [('count',count), ('before',before), ('after',after)] if v])
372 r = requests.get(f"https://old.reddit.com/r/{subreddit}.json?{query}", headers={'User-Agent':'Mozilla/5.0'})
373 if not r.ok or not 'data' in r.json():
374 return r.text+"error retrieving reddit data", 502
375
376 good = [e for e in r.json()['data']['children'] if e['data']['score'] > 1]
377 bad = [e for e in r.json()['data']['children'] if e['data']['score'] <=1]
378 videos = []
379 for entry in (good+bad):
380 e = entry['data']
381 if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us']:
382 continue
383 video_id = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/embed/)([-_0-9A-Za-z]+)', e['url']).group(1)
384 if not video_id: continue
385 videos.append({
386 'video_id': video_id,
387 'title': e['title'],
388 'url': e['permalink'],
389 'n_comments': e['num_comments'],
390 'n_karma': e['score'],
391 })
392 before = r.json()['data']['before']
393 after = r.json()['data']['after']
394 return render_template('reddit.html.j2', subreddit=subreddit, rows=videos, before=before, after=after, count=count)
395
396 def get_cipher():
397 # reload cipher from database every 1 hour
398 if 'cipher' not in g or time.time() - g.get('cipher_updated', 0) > 1 * 60 * 60:
399 with sqlite3.connect(cf['global']['database']) as conn:
400 c = conn.cursor()
401 c.execute("SELECT sts, algorithm FROM cipher")
402 g.cipher = c.fetchone()
403 g.cipher_updated = time.time()
404
405 return g.cipher
406
407 #@app.teardown_appcontext
408 #def teardown_db():
409 # db = g.pop('db', None)
410 #
411 # if db is not None:
412 # db.close()
413
414 @app.template_filter('format_date')
415 def format_date(s):
416 (y,m,d) = (int(n) for n in s.split('T')[0].split(' ')[0].split('-')) # iso-dates can seperate date from time with space or 'T'
417 M = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
418 return f"{d} {M[m]}"
419
420 def pp(*args):
421 from pprint import pprint
422 import sys, codecs
423 pprint(args, stream=codecs.getwriter("utf-8")(sys.stderr.buffer))
424
425 if __name__ == '__main__':
426 app.run(debug=True)
Imprint / Impressum