]>
git.gir.st - subscriptionfeed.git/blob - app/common/anticaptcha.py
4 from urllib
.parse
import urljoin
5 from html
.parser
import HTMLParser
7 from werkzeug
.exceptions
import NotFound
9 from ..common
.common
import cf
, flask_logger
11 # NOTE: We are taking a few shortcuts here, that might bite us later:
12 # 1. we are not sending form data or request cookies
13 # 2. we are only storing the 'goojf' cookie
14 # 3. invidious has some extra cookie code for /sorry/index
15 # 4. we are expecting a response within 90 seconds (max is 5min)
17 class ExtractCaptcha(HTMLParser
):
18 def __init__(self
, html
):
24 self
.handle_starttag
= self
.find_form
26 def find_form(self
, tag
, attrs
):
28 clean_action
= attrs
.get('action','').partition('?')[0]
29 if tag
== "form" and clean_action
in ["/das_captcha", "index"]:
30 self
.action
= attrs
["action"]
31 self
.handle_starttag
= self
.find_values
32 self
.handle_endtag
= self
.find_end
33 def find_values(self
, tag
, attrs
):
35 if tag
== "div" and attrs
.get('id') == "recaptcha" or \
36 tag
== "div" and attrs
.get('class') == "g-recaptcha":
37 self
.sitekey
= attrs
.get('data-sitekey')
38 self
.svalue
= attrs
.get('data-s')
39 if tag
== "input" and "name" in attrs
:
40 self
.inputs
[attrs
["name"]] = attrs
["value"]
41 def find_end(self
, tag
):
43 self
.handle_starttag
= self
.find_nil
44 self
.handle_endtag
= self
.find_nil
45 def find_nil(self
, *args
): pass
47 def submit_captcha(r
):
49 # - False if not rate limited or disabled by user
50 # - True if just now submitted.
51 # - int(seconds since last request) if already submitted
53 api_key
= cf
['captcha']['api_key']
54 api_host
= cf
['captcha']['api_host']
55 public_uri
= cf
['webhooks']['public_uri']
58 return False # disabled by admin
60 if "To continue with your YouTube experience, please fill out the form below." not in r
.text
:
63 with sqlite3
.connect(cf
['global']['database']) as conn
:
66 # check if a captcha was already submitted recently:
68 SELECT (julianday('now') - julianday(timestamp)) * 86400
70 WHERE timestamp > datetime('now', '-90 seconds')
71 ORDER BY timestamp DESC
73 """) # Note: 90sec should work fine for capmonster, might need tweaking.
75 if result
: # already submitted
77 flask_logger(f
"last request submitted {last_ago}s ago")
80 captcha
= ExtractCaptcha(r
.text
)
81 nonce
= secrets
.token_urlsafe(16)
83 # note: auto field for current datetime
85 INSERT INTO captcha_requests(nonce, url, action)
87 """, (nonce
, r
.url
, captcha
.action
))
91 with requests_cache
.disabled():
92 r2
= requests
.post(f
"{api_host}/createTask", json
={
95 "type": "NoCaptchaTaskProxyless",
97 "websiteKey": captcha
.sitekey
,
98 "recaptchaDataSValue": captcha
.svalue
,
100 "callbackUrl": f
"{public_uri}/captcha_response/v1/{nonce}",
102 task_id
= r2
.json().get("taskId")
103 flask_logger(f
"submitted captcha task with id {task_id}")
107 def solve_captcha(nonce
, json_obj
):
108 with sqlite3
.connect(cf
['global']['database']) as conn
:
112 FROM captcha_requests
113 WHERE nonce = ? -- AND timestamp > date('now', '-90 seconds')
116 url
, action
= c
.fetchone()
118 raise NotFound
# todo: ugly
120 solution
= json_obj
.get("solution", {})
122 import requests_cache
123 with requests_cache
.disabled():
125 urljoin(url
, action
),
126 data
={"g-recaptcha-response": solution
.get("gRecaptchaResponse")},
127 allow_redirects
=False
130 cookie_name
= "goojf"
131 cookie_value
= r
.cookies
.get(cookie_name
)
133 INSERT OR REPLACE INTO captcha_cookies(name, value)
135 """, (cookie_name
, cookie_value
))
138 DELETE FROM captcha_requests
139 WHERE nonce = ? OR timestamp < date('now', '-90 seconds')