From f3b2526482a75aa75b403385c3a39ef3d745fd18 Mon Sep 17 00:00:00 2001 From: girst Date: Sat, 15 Aug 2020 21:18:13 +0200 Subject: [PATCH] make csrf verification use http.parser instead of regexp finally removes the unecessary csrf field from GET forms --- app/__init__.py | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index 9968362..26785b5 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -46,29 +46,47 @@ def log_errors(e): # Magic CSRF protection: This modifies outgoing HTML responses and injects a csrf token into all forms. # All post requests are then checked if they contain the valid token. # TODO: -# - don't use regex for injecting +# - knobs: mimetypes, http methods, form field name, token generator # - inject a http header into all responses (that could be used by apis) # - allow csrf token to be passed in http header, json, ... # - a decorator on routes to opt out of verification or output munging # https://stackoverflow.com/questions/19574694/flask-hit-decorator-before-before-request-signal-fires # - allow specifying hmac message contents (currently request.remote_addr) -import re import hmac import hashlib from flask import request +from werkzeug.exceptions import BadRequest +from html.parser import HTMLParser +@app.template_global() +def csrf_token(): + # TODO: will fail behind reverse proxy (remote_addr always localhost) + return hmac.new(app.secret_key, request.remote_addr.encode('ascii'), hashlib.sha256).hexdigest() @app.after_request def add_csrf_protection(response): if response.mimetype == "text/html": - token = hmac.new(app.secret_key, request.remote_addr.encode('ascii'), hashlib.sha256).hexdigest() # TODO: will fail behind reverse proxy (remote_addr always localhost) - response.set_data( re.sub( - rb'''(<[Ff][Oo][Rr][Mm](\s+[a-zA-Z0-9-]+(=(\w*|'[^']*'|"[^"]*"))?)*>)''', # match form tags with any number of attributes and any type of quotes - rb'\1', # hackily append a hidden input with our csrf protection value - response.get_data())) + csrf_elem = f'' + new_response = add_csrf(response.get_data().decode('utf-8'), csrf_elem) + response.set_data(new_response.encode('utf-8')) return response @app.before_request def verify_csrf_protection(): - token = hmac.new(app.secret_key, request.remote_addr.encode('ascii'), hashlib.sha256).hexdigest() # TODO: will fail behind reverse proxy (remote_addr always localhost) - if request.method == "POST" and request.form.get('csrf') != token: - return "CSRF validation failed!", 400 - request.form = request.form.copy() # make it mutable + if request.method == "POST" and request.form.get('csrf') != csrf_token(): + raise BadRequest("CSRF validation failed") + request.form = request.form.copy() # make it mutable request.form.poplist('csrf') # remove our csrf again +def add_csrf(html_in, csrf_elem): + class FindForms(HTMLParser): + def __init__(self, html): + super().__init__() + self.forms = [] # tuples of (line_number, tag_offset, tag_length) + super().feed(html) + def handle_starttag(self, tag, attrs): + line, offset = self.getpos() + if tag == "form" and dict(attrs).get('method','').upper() == "POST": + self.forms.append((line, offset, len(self.get_starttag_text()))) + lines = html_in.splitlines(keepends=True) + # Note: going in reverse, to not invalidate offsets: + for line, offset, length in reversed(FindForms(html_in).forms): + l = lines[line-1] + lines[line-1] = l[:offset+length] + csrf_elem + l[offset+length:] + return "".join(lines) -- 2.39.3