]>
git.gir.st - subscriptionfeed.git/blob - app/common/anticaptcha.py
5 from urllib
.parse
import urljoin
6 from html
.parser
import HTMLParser
8 from werkzeug
.exceptions
import NotFound
10 from ..common
.common
import cf
, flask_logger
12 class ExtractCaptcha(HTMLParser
):
13 def __init__(self
, html
):
19 self
.handle_starttag
= self
.find_form
21 def find_form(self
, tag
, attrs
):
23 clean_action
= attrs
.get('action','').partition('?')[0]
24 if tag
== "form" and clean_action
in ["/das_captcha", "index"]:
25 self
.action
= attrs
["action"]
26 self
.handle_starttag
= self
.find_values
27 self
.handle_endtag
= self
.find_end
28 def find_values(self
, tag
, attrs
):
30 if tag
== "div" and attrs
.get('id') == "recaptcha" or \
31 tag
== "div" and attrs
.get('class') == "g-recaptcha":
32 self
.sitekey
= attrs
.get('data-sitekey')
33 self
.svalue
= attrs
.get('data-s')
34 if tag
== "input" and "name" in attrs
:
35 self
.inputs
[attrs
["name"]] = attrs
["value"]
36 def find_end(self
, tag
):
38 self
.handle_starttag
= self
.find_nil
39 self
.handle_endtag
= self
.find_nil
40 def find_nil(self
, *args
): pass
42 def submit_captcha(r
):
44 # - False if not rate limited or disabled by user
45 # - True if just now submitted.
46 # - int(seconds since last request) if already submitted
48 api_key
= cf
['captcha']['api_key']
49 api_host
= cf
['captcha']['api_host']
50 public_uri
= cf
['webhooks']['public_uri']
53 return False # disabled by admin
55 if "To continue with your YouTube experience, please fill out the form below." not in r
.text
:
58 with sqlite3
.connect(cf
['global']['database']) as conn
:
61 # check if a captcha was already submitted recently:
63 SELECT (julianday('now') - julianday(timestamp)) * 86400
65 WHERE timestamp > datetime('now', '-90 seconds')
66 ORDER BY timestamp DESC
68 """) # Note: 90sec should work fine for capmonster, might need tweaking.
70 if result
: # already submitted
74 captcha
= ExtractCaptcha(r
.text
)
75 nonce
= secrets
.token_urlsafe(16)
76 inputs
= json
.dumps(captcha
.inputs
)
77 #^: {"action_recaptcha_verify2": "1", "next": "/watch?v=***&hl=en&gl=US"}
79 # note: auto field for current datetime
81 INSERT INTO captcha_requests(nonce, url, action, inputs)
83 """, (nonce
, r
.url
, captcha
.action
, inputs
))
86 r2
= requests
.post(f
"{api_host}/createTask", json
={
89 "type": "NoCaptchaTaskProxyless",
91 "websiteKey": captcha
.sitekey
,
92 "recaptchaDataSValue": captcha
.svalue
,
94 "callbackUrl": f
"{public_uri}/captcha_response/v1/{nonce}",
96 task_id
= r2
.json().get("taskId")
97 flask_logger(f
"submitted captcha task with id {task_id}", "info")
101 def solve_captcha(nonce
, json_obj
):
102 with sqlite3
.connect(cf
['global']['database']) as conn
:
105 SELECT url, action, inputs
106 FROM captcha_requests
107 WHERE nonce = ? -- AND timestamp > date('now', '-90 seconds')
110 url
, action
, inputs
= c
.fetchone()
111 inputs
= json
.loads(inputs
)
113 raise NotFound
# todo: ugly
115 solution
= json_obj
.get("solution", {})
116 inputs
["g-recaptcha-response"] = solution
.get("gRecaptchaResponse")
117 cookies
= solution
.get("cookies")
120 urljoin(url
, action
),
121 cookies
=cookies
, data
=inputs
,
122 allow_redirects
=False
125 captcha_cookies
= r
.cookies
126 #cargo-culted from invidious, but i don't believe it's necessary
127 # if enabled, use r.post(allow_redirects=False)!
128 #if action == "/sorry/index":
129 # from urllib.parse import parse_qs
130 # captcha_cookies, _, _ = parse_qs(r.headers["Location"]) \
131 # .get("google_abuse", "") \
133 # xxx: returns cookie header-value; parse to dict
135 c
.execute("DELETE FROM captcha_cookies")
136 # not using insert-or-replace-into to avoid keeping removed cookies
138 INSERT INTO captcha_cookies(name, value)
140 """, captcha_cookies
.items())
142 DELETE FROM captcha_requests
143 WHERE nonce = ? OR timestamp < date('now', '-1 minute')