]> git.gir.st - subscriptionfeed.git/blob - app/common/anticaptcha.py
cache POST requests, explicity not cache some POSTs
[subscriptionfeed.git] / app / common / anticaptcha.py
1 import secrets
2 import sqlite3
3 import requests
4 from urllib.parse import urljoin
5 from html.parser import HTMLParser
6
7 from werkzeug.exceptions import NotFound
8
9 from ..common.common import cf, flask_logger
10
11 # NOTE: We are taking a few shortcuts here, that might bite us later:
12 # 1. we are not sending form data or request cookies
13 # 2. we are only storing the 'goojf' cookie
14 # 3. invidious has some extra cookie code for /sorry/index
15 # 4. we are expecting a response within 90 seconds (max is 5min)
16
17 class ExtractCaptcha(HTMLParser):
18 def __init__(self, html):
19 super().__init__()
20 self.action = None
21 self.sitekey = None
22 self.svalue = None
23 self.inputs = {}
24 self.handle_starttag = self.find_form
25 super().feed(html)
26 def find_form(self, tag, attrs):
27 attrs = dict(attrs)
28 clean_action = attrs.get('action','').partition('?')[0]
29 if tag == "form" and clean_action in ["/das_captcha", "index"]:
30 self.action = attrs["action"]
31 self.handle_starttag = self.find_values
32 self.handle_endtag = self.find_end
33 def find_values(self, tag, attrs):
34 attrs = dict(attrs)
35 if tag == "div" and attrs.get('id') == "recaptcha" or \
36 tag == "div" and attrs.get('class') == "g-recaptcha":
37 self.sitekey = attrs.get('data-sitekey')
38 self.svalue = attrs.get('data-s')
39 if tag == "input" and "name" in attrs:
40 self.inputs[attrs["name"]] = attrs["value"]
41 def find_end(self, tag):
42 if tag == "form":
43 self.handle_starttag = self.find_nil
44 self.handle_endtag = self.find_nil
45 def find_nil(self, *args): pass
46
47 def submit_captcha(r):
48 # returns:
49 # - False if not rate limited or disabled by user
50 # - True if just now submitted.
51 # - int(seconds since last request) if already submitted
52
53 api_key = cf['captcha']['api_key']
54 api_host = cf['captcha']['api_host']
55 public_uri = cf['webhooks']['public_uri']
56
57 if not api_key:
58 return False # disabled by admin
59
60 if "To continue with your YouTube experience, please fill out the form below." not in r.text:
61 return False
62
63 with sqlite3.connect(cf['global']['database']) as conn:
64 c = conn.cursor()
65
66 # check if a captcha was already submitted recently:
67 c.execute("""
68 SELECT (julianday('now') - julianday(timestamp)) * 86400
69 FROM captcha_requests
70 WHERE timestamp > datetime('now', '-90 seconds')
71 ORDER BY timestamp DESC
72 LIMIT 1
73 """) # Note: 90sec should work fine for capmonster, might need tweaking.
74 result = c.fetchone()
75 if result: # already submitted
76 (last_ago,) = result
77 flask_logger(f"last request submitted {last_ago}s ago")
78 return int(last_ago)
79
80 captcha = ExtractCaptcha(r.text)
81 nonce = secrets.token_urlsafe(16)
82
83 # note: auto field for current datetime
84 c.execute("""
85 INSERT INTO captcha_requests(nonce, url, action)
86 VALUES (?, ?, ?)
87 """, (nonce, r.url, captcha.action))
88 conn.commit()
89
90 import requests_cache
91 with requests_cache.disabled():
92 r2 = requests.post(f"{api_host}/createTask", json={
93 "clientKey": api_key,
94 "task": {
95 "type": "NoCaptchaTaskProxyless",
96 "websiteURL": r.url,
97 "websiteKey": captcha.sitekey,
98 "recaptchaDataSValue": captcha.svalue,
99 },
100 "callbackUrl": f"{public_uri}/captcha_response/v1/{nonce}",
101 })
102 task_id = r2.json().get("taskId")
103 flask_logger(f"submitted captcha task with id {task_id}")
104
105 return True
106
107 def solve_captcha(nonce, json_obj):
108 with sqlite3.connect(cf['global']['database']) as conn:
109 c = conn.cursor()
110 c.execute("""
111 SELECT url, action
112 FROM captcha_requests
113 WHERE nonce = ? -- AND timestamp > date('now', '-90 seconds')
114 """, (nonce,))
115 try:
116 url, action = c.fetchone()
117 except:
118 raise NotFound # todo: ugly
119
120 solution = json_obj.get("solution", {})
121
122 import requests_cache
123 with requests_cache.disabled():
124 r = requests.post(
125 urljoin(url, action),
126 data={"g-recaptcha-response": solution.get("gRecaptchaResponse")},
127 allow_redirects=False
128 )
129
130 cookie_name = "goojf"
131 cookie_value = r.cookies.get(cookie_name)
132 c.execute("""
133 INSERT OR REPLACE INTO captcha_cookies(name, value)
134 VALUES (?, ?)
135 """, (cookie_name, cookie_value))
136
137 c.execute("""
138 DELETE FROM captcha_requests
139 WHERE nonce = ? OR timestamp < date('now', '-90 seconds')
140 """, (nonce,))
Imprint / Impressum