]> git.gir.st - subscriptionfeed.git/blob - app/browse/lib.py
fix JSONDecodeError on search for real this time
[subscriptionfeed.git] / app / browse / lib.py
1 import requests
2 from datetime import datetime, timezone
3
4 def fetch_searchresults(q=None, page=1, sp=None):
5 for _ in range(2):
6 today = datetime.now(timezone.utc).strftime("%Y%m%d")
7 r = requests.get(f"https://www.youtube.com/results", {
8 'q': q, # Note: if we use '?search_query=' (as yt.com does), we can't use '&page=', but have to use a continuation url that requires an api key
9 'pbj': 1, # makes youtube return a json-response
10 'hl': 'en', #'en_US',
11 'page': page,
12 'sp': sp,
13 }, headers={
14 'x-youtube-client-name': '1',
15 'x-youtube-client-version': f'2.{today}.01.01', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000)
16 })
17 if not r.ok:
18 return None
19
20 # Sometimes, youtube throws an exception after the response already begun.
21 # So the status code is 200, begins with JSON and switches to HTML half way
22 # through. WTF?! (This should be "fixed" by retrying, though)
23 if r.text.endswith("</html>"):
24 continue # will return None once we break out of the loop
25
26 return r.json()
27
28 def fetch_ajax(params):
29 """
30 fetch data using a continuation protobuf
31 """
32 # TODO: handle auto_generated!
33 today = datetime.now(timezone.utc).strftime("%Y%m%d")
34
35 r = requests.get(f"https://www.youtube.com/browse_ajax", {
36 'continuation': params,
37 'gl': 'US',
38 'hl': 'en',
39 }, headers={
40 'x-youtube-client-name': '1',
41 'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults()
42 })
43
44 if not r.ok:
45 return None
46
47 return r.json()
Imprint / Impressum