]> git.gir.st - subscriptionfeed.git/blob - app/frontend.py
some error handling improvements
[subscriptionfeed.git] / app / frontend.py
1 import re
2 import time
3 import hmac
4 import hashlib
5 import sqlite3
6 import secrets
7 import requests
8 import requests_cache
9 from urllib.parse import parse_qs
10 from flask import Flask, render_template, request, redirect, flash, url_for, jsonify, g
11
12 from common import *
13
14 app = Flask(__name__)
15 app.secret_key = secrets.token_bytes(16) # XXX: generate and hard-code, or cookies and csrf-validation will fail!
16 # Note: currently expiring after 10 minutes. googlevideo-urls are valid for 5h59m, but this makes reddit very stale and premiere videos won't start.
17 requests_cache.install_cache(backend='memory', expire_after=10*60, allowable_codes=(200,))
18
19 # Note: this should only be required for the 'memory' backed cache.
20 from threading import Timer
21 def purge_cache(sec):
22 requests_cache.remove_expired_responses()
23 t = Timer(sec, purge_cache, args=(sec,))
24 t.setDaemon(True)
25 t.start()
26 purge_cache(10*60)
27
28 @app.route('/')
29 def index():
30 return redirect(url_for('feed'), code=302)
31
32 @app.route('/feed/subscriptions')
33 def feed():
34 token = request.args.get('token', 'guest')
35 page = int(request.args.get('page', 0))
36 with sqlite3.connect(cf['global']['database']) as conn:
37 c = conn.cursor()
38 c.execute("""
39 SELECT videos.id, channel_id, name, title, published, flags.display
40 FROM videos
41 JOIN channels ON videos.channel_id = channels.id
42 LEFT JOIN flags ON (videos.id = flags.video_id) AND (flags.user = ?)
43 WHERE channel_id IN
44 (SELECT channel_id FROM subscriptions WHERE user = ?)
45 AND flags.display IS NOT 'hidden'
46 ORDER BY (display = 'pinned') DESC, crawled DESC
47 LIMIT 36
48 OFFSET 36*?""", (token, token, page))
49 rows = [{
50 'video_id': video_id,
51 'channel_id': channel_id,
52 'author': author,
53 'title': title,
54 'published': published,
55 'pinned': display == 'pinned',
56 } for (video_id, channel_id, author, title, published, display) in c.fetchall()]
57 return render_template('index.html.j2', rows=rows, page=page)
58
59 @app.route('/watch')
60 def watch():
61 if not 'v' in request.args:
62 return "missing video id", 400
63
64 plaintextheader = {'content-type': 'text/plain',"Link": "<data:text/css,body%7Bcolor:%23eee;background:%23333%7D>; rel=stylesheet;"}
65
66 video_id = request.args.get('v')
67 (sts, algo) = get_cipher()
68 (video_url, metadata, error_type, error) = get_video_info(video_id, sts, algo)
69 if error_type in ['initial', 'player']:
70 return error, 400, plaintextheader
71
72 show = request.args.get("show")
73 if show == "raw":
74 if error:
75 extra = {'geolocked':'local=1', 'livestream':'raw=0'}.get(error,'')
76 # if error==exhausted, metadata.playabilityStatus.reason may contain additional information.
77 return f"{error.upper()}: Redirecting to Invidious.", 502, {'Refresh': f'2; URL=https://invidio.us/watch?v={video_id}&{extra}&raw=1', **plaintextheader}
78 return redirect(video_url, code=307)
79 elif show == "json":
80 return jsonify(metadata)
81 else: # todo: handle geolocked, livesteam and the case when we have an exhausted error with no metadata returned
82 if error:
83 err_desc = {'geolocked': "this video is geolocked", 'livestream': "livestreams not yet supported", 'exhausted': "couldn't extract video urls"}.get(error,'')
84 flash(("error",f"{err_desc}. Watch on <a href='https://invidio.us/watch?v={video_id}'>Invidious</a> or <a href='https://www.youtube.com/watch?v={video_id}'>Youtube</a>")) # todo: cleanup
85 return render_template('watch.html.j2', video_id=video_id, video_url=video_url, **prepare_metadata(metadata))
86
87 @app.route('/channel/<channel_id>')
88 def channel(channel_id):
89 if not re.match(r"(UC[A-Za-z0-9_-]{22})", channel_id):
90 return "bad channel id", 400 # todo
91
92 xmlfeed = fetch_xml("channel_id", channel_id)
93 if not xmlfeed:
94 return "not found or something", 404 # XXX
95 (title, author, _, videos) = parse_xml(xmlfeed)
96 return render_template('xmlfeed.html.j2', title=author, rows=videos)
97
98 @app.route('/playlist')
99 def playlist():
100 playlist_id = request.args.get('list')
101 if not playlist_id:
102 return "bad list id", 400 # todo
103
104 xmlfeed = fetch_xml("playlist_id", playlist_id)
105 if not xmlfeed:
106 return "not found or something", 404 # XXX
107 (title, author, _, videos) = parse_xml(xmlfeed)
108 return render_template('xmlfeed.html.j2', title=f"{title} by {author}", rows=videos)
109
110 @app.route('/subscription_manager')
111 def subscription_manager():
112 token = request.args.get('token', 'guest')
113 with sqlite3.connect(cf['global']['database']) as conn:
114 #with conn.cursor() as c:
115 c = conn.cursor()
116 c.execute("""
117 SELECT subscriptions.channel_id, name,
118 (subscribed_until < datetime('now')) AS obsolete
119 FROM subscriptions
120 left JOIN channels ON channels.id = subscriptions.channel_id
121 left JOIN websub ON channels.id = websub.channel_id
122 WHERE user = ?
123 ORDER BY obsolete=0, name COLLATE NOCASE ASC""", (token,))
124 rows = [{
125 'channel_id': channel_id,
126 'author': author or channel_id,
127 'subscribed_until': subscribed_until
128 } for (channel_id, author, subscribed_until) in c.fetchall()]
129 return render_template('subscription_manager.html.j2', rows=rows)
130
131 @app.route('/feed/subscriptions', methods=['POST'])
132 def feed_post():
133 token = request.args.get('token', 'guest')
134 if token == 'guest': return "guest user is read-only", 403
135 action = next(request.form.keys(), None)
136 if action in ['pin', 'unpin', 'hide']:
137 video_id = request.form.get(action)
138 display = {
139 'pin': 'pinned',
140 'unpin': None,
141 'hide': 'hidden',
142 }[action]
143 with sqlite3.connect(cf['global']['database']) as conn:
144 #with conn.cursor() as c:
145 c = conn.cursor()
146 c.execute("""
147 INSERT OR REPLACE INTO flags (user, video_id, display)
148 VALUES (?, ?, ?)
149 """, (token, video_id, display))
150 else:
151 flash(("error","unsupported action"))
152 return redirect(request.url, code=303)
153
154 @app.route('/subscription_manager', methods=['POST'])
155 def manage_subscriptions():
156 token = request.args.get('token', 'guest')
157 if token == 'guest': return "guest user is read-only", 403
158 if 'subscribe' in request.form:
159 channel_id = request.form.get("subscribe")
160 match = re.match(r"(UC[A-Za-z0-9_-]{22})", channel_id)
161 if match:
162 channel_id = match.group(1)
163 else:
164 match = re.match(r"((?:PL|LL|EC|UU|FL|UL|OL)[A-Za-z0-9_-]{10,})", channel_id)
165 if match: # NOTE: PL-playlists are 32chars, others differ in length.
166 flash(("error","playlists not (yet?) supported."))
167 return redirect(request.url, code=303) # TODO: dedup redirection
168 else:
169 flash(("error","not a valid/subscribable URI"))
170 return redirect(request.url, code=303) # TODO: dedup redirection
171 with sqlite3.connect(cf['global']['database']) as conn:
172 #with conn.cursor() as c:
173 c = conn.cursor()
174 c.execute("""
175 INSERT OR IGNORE INTO subscriptions (user, channel_id)
176 VALUES (?, ?)
177 """, (token, channel_id))
178 # TODO: sql-error-handling, asynchronically calling update-subs.pl
179
180 elif 'unsubscribe' in request.form:
181 with sqlite3.connect(cf['global']['database']) as conn:
182 #with conn.cursor() as c:
183 c = conn.cursor()
184 c.execute("""
185 DELETE FROM subscriptions
186 WHERE user = ? AND channel_id = ?
187 """, (token, channel_id))
188 # TODO: sql-error-handling, report success
189
190 else:
191 flash(("error","unsupported action"))
192
193 return redirect(request.url, code=303)
194
195 @app.route('/r/')
196 def reddit_index():
197 return ""
198 @app.route('/r/<subreddit>')
199 def reddit(subreddit="videos"):
200 count = int(request.args.get('count', 0))
201 before = request.args.get('before')
202 after = request.args.get('after')
203 query = '&'.join([f"{k}={v}" for k,v in [('count',count), ('before',before), ('after',after)] if v])
204 r = requests.get(f"https://old.reddit.com/r/{subreddit}.json?{query}", headers={'User-Agent':'Mozilla/5.0'})
205 if not r.ok or not 'data' in r.json():
206 return r.text+"error retrieving reddit data", 502
207
208 good = [e for e in r.json()['data']['children'] if e['data']['score'] > 1]
209 bad = [e for e in r.json()['data']['children'] if e['data']['score'] <=1]
210 videos = []
211 for entry in (good+bad):
212 e = entry['data']
213 if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us']:
214 continue
215 video_id = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/embed/)([-_0-9A-Za-z]+)', e['url']).group(1)
216 if not video_id: continue
217 videos.append({
218 'video_id': video_id,
219 'title': e['title'],
220 'url': e['permalink'],
221 'n_comments': e['num_comments'],
222 'n_karma': e['score'],
223 })
224 before = r.json()['data']['before']
225 after = r.json()['data']['after']
226 return render_template('reddit.html.j2', subreddit=subreddit, rows=videos, before=before, after=after, count=count)
227
228 def get_cipher():
229 # reload cipher from database every 1 hour
230 if 'cipher' not in g or time.time() - g.get('cipher_updated', 0) > 1 * 60 * 60:
231 with sqlite3.connect(cf['global']['database']) as conn:
232 c = conn.cursor()
233 c.execute("SELECT sts, algorithm FROM cipher")
234 g.cipher = c.fetchone()
235 g.cipher_updated = time.time()
236
237 return g.cipher
238
239 #@app.teardown_appcontext
240 #def teardown_db():
241 # db = g.pop('db', None)
242 #
243 # if db is not None:
244 # db.close()
245
246 # Magic CSRF protection: This modifies outgoing HTML responses and injects a csrf token into all forms.
247 # All post requests are then checked if they contain the valid token.
248 # TODO:
249 # - don't use regex for injecting
250 # - inject a http header into all responses (that could be used by apis)
251 # - allow csrf token to be passed in http header, json, ...
252 # - a decorator on routes to opt out of verification or output munging
253 @app.after_request
254 def add_csrf_protection(response):
255 if response.mimetype == "text/html":
256 token = hmac.new(app.secret_key, request.remote_addr.encode('ascii'), hashlib.sha256).hexdigest() # TODO: will fail behind reverse proxy (remote_addr always localhost)
257 response.set_data( re.sub(
258 rb'''(<[Ff][Oo][Rr][Mm](\s+[a-zA-Z0-9-]+(=(\w*|'[^']*'|"[^"]*"))?)*>)''', # match form tags with any number of attributes and any type of quotes
259 rb'\1<input type="hidden" name="csrf" value="'+token.encode('ascii')+rb'">', # hackily append a hidden input with our csrf protection value
260 response.get_data()))
261 return response
262 @app.before_request
263 def verify_csrf_protection():
264 token = hmac.new(app.secret_key, request.remote_addr.encode('ascii'), hashlib.sha256).hexdigest() # TODO: will fail behind reverse proxy (remote_addr always localhost)
265 if request.method == "POST" and request.form.get('csrf') != token:
266 return "CSRF validation failed!", 400
267 request.form = request.form.copy() # make it mutable
268 request.form.poplist('csrf') # remove our csrf again
269
270 @app.template_filter('format_date')
271 def format_date(s):
272 (y,m,d) = (int(n) for n in s.split('T')[0].split(' ')[0].split('-')) # iso-dates can seperate date from time with space or 'T'
273 M = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
274 return f"{d} {M[m]}"
275
276 if __name__ == '__main__':
277 app.run(debug=True)
Imprint / Impressum