From 229dbec27417f6fe8da5251b7289b0697bd75360 Mon Sep 17 00:00:00 2001 From: girst Date: Fri, 5 Mar 2021 20:08:13 +0100 Subject: [PATCH] integrate anticaptcha into common and clean it up a bit doesn't make sense as a seperate blueprint; can be disabled in the config. --- app/anticaptcha/__init__.py | 23 ----- .../lib.py => common/anticaptcha.py} | 89 ++++++++++--------- app/webhooks/__init__.py | 7 ++ app/youtube/__init__.py | 19 ++-- config/setup.sql | 3 - 5 files changed, 67 insertions(+), 74 deletions(-) delete mode 100644 app/anticaptcha/__init__.py rename app/{anticaptcha/lib.py => common/anticaptcha.py} (60%) diff --git a/app/anticaptcha/__init__.py b/app/anticaptcha/__init__.py deleted file mode 100644 index 5f249d0..0000000 --- a/app/anticaptcha/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -from flask import Blueprint, request, g, url_for -from werkzeug.exceptions import NotFound -from urllib.parse import urljoin -import json -import sqlite3 -import requests - -from ..common.common import cf -from ..common.anticsrf import no_csrf_protection - -from .lib import solve_captcha - -frontend = Blueprint("anticaptcha", __name__) - -@frontend.before_app_request -def inject_header_item(): # makes a submodule accessible. - pass - -@frontend.route("/captcha_response/", methods=["POST"]) -@no_csrf_protection -def captcha_response(nonce): - solve_captcha(nonce, request.json) - return "", 204 diff --git a/app/anticaptcha/lib.py b/app/common/anticaptcha.py similarity index 60% rename from app/anticaptcha/lib.py rename to app/common/anticaptcha.py index 4d785ef..3dd8b26 100644 --- a/app/anticaptcha/lib.py +++ b/app/common/anticaptcha.py @@ -1,14 +1,13 @@ -from html.parser import HTMLParser -from urllib.parse import urljoin import json import secrets import sqlite3 import requests +from urllib.parse import urljoin +from html.parser import HTMLParser -from ..common.common import cf +from werkzeug.exceptions import NotFound -from werkzeug.exceptions import BadGateway -from flask import current_app +from ..common.common import cf, flask_logger class ExtractCaptcha(HTMLParser): def __init__(self, html): @@ -21,7 +20,8 @@ class ExtractCaptcha(HTMLParser): super().feed(html) def find_form(self, tag, attrs): attrs = dict(attrs) - if tag == "form" and attrs.get('action').partition('?')[0] in ["/das_captcha", "index"]: + clean_action = attrs.get('action','').partition('?')[0] + if tag == "form" and clean_action in ["/das_captcha", "index"]: self.action = attrs["action"] self.handle_starttag = self.find_values self.handle_endtag = self.find_end @@ -39,41 +39,50 @@ class ExtractCaptcha(HTMLParser): self.handle_endtag = self.find_nil def find_nil(self, *args): pass -def check_captcha_or_raise(r): +def submit_captcha(r): + # returns: + # - False if not rate limited or disabled by user + # - True if just now submitted. + # - int(seconds since last request) if already submitted + + api_key = cf['captcha']['api_key'] + api_host = cf['captcha']['api_host'] + public_uri = cf['webhooks']['public_uri'] + + if not api_key: + return False # disabled by admin + if "To continue with your YouTube experience, please fill out the form below." not in r.text: - return + return False with sqlite3.connect(cf['global']['database']) as conn: c = conn.cursor() # check if a captcha was already submitted recently: c.execute(""" - SELECT COUNT(*) - FROM captcha_requests - WHERE timestamp > datetime('now', '-90 seconds') - """) - (already_submitted,) = c.fetchone() - if already_submitted: - current_app.logger.warn("check_captcha_or_raise: already submitted") - # TODO: get time of last submission and display that to the user - return + SELECT (julianday('now') - julianday(timestamp)) * 86400 + FROM captcha_requests + WHERE timestamp > datetime('now', '-90 seconds') + ORDER BY timestamp DESC + LIMIT 1 + """) # Note: 90sec should work fine for capmonster, might need tweaking. + result = c.fetchone() + if result: # already submitted + (last_ago,) = result + return int(last_ago) captcha = ExtractCaptcha(r.text) nonce = secrets.token_urlsafe(16) inputs = json.dumps(captcha.inputs) + #^: {"action_recaptcha_verify2": "1", "next": "/watch?v=***&hl=en&gl=US"} - task_id = 0 # XXX: if i commit after i send the request, it fails!? # note: auto field for current datetime - # note: key, svalue only for debugging c.execute(""" - INSERT INTO captcha_requests(nonce, url, action, key, svalue, task_id, inputs) - VALUES (?, ?, ?, ?, ?, ?, ?) - """, (nonce, r.url, captcha.action, captcha.sitekey, captcha.svalue, task_id, inputs)) + INSERT INTO captcha_requests(nonce, url, action, inputs) + VALUES (?, ?, ?, ?) + """, (nonce, r.url, captcha.action, inputs)) conn.commit() - api_key = cf['captcha']['api_key'] - api_host = cf['captcha']['api_host'] - public_uri = cf['webhooks']['public_uri'] r2 = requests.post(f"{api_host}/createTask", json={ "clientKey": api_key, "task": { @@ -82,39 +91,36 @@ def check_captcha_or_raise(r): "websiteKey": captcha.sitekey, "recaptchaDataSValue": captcha.svalue, }, - "callbackUrl": f"{public_uri}/captcha_response/{nonce}", + "callbackUrl": f"{public_uri}/captcha_response/v1/{nonce}", }) task_id = r2.json().get("taskId") - c.execute(""" - UPDATE OR IGNORE captcha_response - SET task_id = ? - WHERE nonce = ? - """, (task_id, nonce)) # for debugging only; task_id is not in webhook response + flask_logger(f"submitted captcha task with id {task_id}", "info") - raise BadGateway("Rate-limited by Youtube; please try again in two seconds") + return True def solve_captcha(nonce, json_obj): with sqlite3.connect(cf['global']['database']) as conn: c = conn.cursor() c.execute(""" - SELECT url, action, task_id, inputs + SELECT url, action, inputs FROM captcha_requests WHERE nonce = ? -- AND timestamp > date('now', '-90 seconds') """, (nonce,)) try: - url, action, task_id, inputs = c.fetchone() + url, action, inputs = c.fetchone() inputs = json.loads(inputs) - # note: there is no taskId in the response, so we can't verify that :| except: raise NotFound # todo: ugly solution = json_obj.get("solution", {}) inputs["g-recaptcha-response"] = solution.get("gRecaptchaResponse") - cookies = solution.get("cookies") # only set/used for "google.com domains and subdomains" - # cookies aren't preserved in r.cookies when the redirect is followed(wtf!?), and we don't need that response anyways. - r = requests.post(urljoin(url, action), cookies=cookies, data=inputs, allow_redirects=False) - import pickle - pickle.dump(r, open("/tmp/das-captcha.req", "wb")) + cookies = solution.get("cookies") + + r = requests.post( + urljoin(url, action), + cookies=cookies, data=inputs, + allow_redirects=False + ) captcha_cookies = r.cookies #cargo-culted from invidious, but i don't believe it's necessary @@ -126,7 +132,8 @@ def solve_captcha(nonce, json_obj): # .partition(";") # xxx: returns cookie header-value; parse to dict - c.execute("DELETE FROM captcha_cookies") # not using insert-or-replace-into to avoid keeping removed cookies + c.execute("DELETE FROM captcha_cookies") + # not using insert-or-replace-into to avoid keeping removed cookies c.executemany(""" INSERT INTO captcha_cookies(name, value) VALUES (?, ?) diff --git a/app/webhooks/__init__.py b/app/webhooks/__init__.py index 8fcaef8..38ab102 100755 --- a/app/webhooks/__init__.py +++ b/app/webhooks/__init__.py @@ -11,6 +11,7 @@ from urllib.parse import parse_qs, urlparse from ..common.common import * from ..common.anticsrf import no_csrf_protection +from ..common.anticaptcha import solve_captcha frontend = Blueprint('webhooks', __name__) @@ -72,5 +73,11 @@ def websub_post(timestamp, nonce, subject, sig): raise e return '', 200 +@frontend.route("/captcha_response/v1/", methods=["POST"]) +@no_csrf_protection +def captcha_response(nonce): + solve_captcha(nonce, request.json) + return "", 204 + if __name__ == '__main__': app().run(debug=True) diff --git a/app/youtube/__init__.py b/app/youtube/__init__.py index 394c251..bb11c98 100644 --- a/app/youtube/__init__.py +++ b/app/youtube/__init__.py @@ -9,6 +9,7 @@ from flask import Blueprint, render_template, request, redirect, flash, url_for, from werkzeug.exceptions import NotFound, BadGateway from ..common.common import * +from ..common.anticaptcha import submit_captcha from .lib import * frontend = Blueprint('youtube', __name__, @@ -106,13 +107,17 @@ def watch(): except: pass # if the instance is blocked, try submitting a job to the anti captcha service: - try: - if error == 'banned' and request.args.get('tobi'): - from ..anticaptcha import lib as anticaptcha - r2 = requests.get(f'https://www.youtube.com/watch?v={video_id}&hl=en&gl=US') - anticaptcha.check_captcha_or_raise(r2) - except BadGateway as e: raise e - except Exception as e: pass # unavailable (TODO: check if blueprint is loaded) + if error == 'banned' and cf['captcha']['api_key']: + r2 = requests.get(f'https://www.youtube.com/watch?v={video_id}&hl=en&gl=US') + status = submit_captcha(r2) + if status is False: + raise Exception("we are banned, but captcha wasn't triggered!") + else: + message = "right now" if status is True else f"{int(status)} seconds ago" + raise BadGateway(f""" + {errdetails} An attempt at getting unblocked has been made {message}. + Please try again in 30 seconds. + """) show = request.args.get("show") if show == "raw": diff --git a/config/setup.sql b/config/setup.sql index b47cf75..a0334c1 100644 --- a/config/setup.sql +++ b/config/setup.sql @@ -61,9 +61,6 @@ CREATE TABLE captcha_requests( nonce STRING, url STRING, action STRING, - key STRING, - svalue STRING, - task_id INTEGER, inputs STRING, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP); CREATE TABLE captcha_cookies( -- 2.39.3