]> git.gir.st - subscriptionfeed.git/blob - app/browse/lib.py
clean up broken-youtube-response handling
[subscriptionfeed.git] / app / browse / lib.py
1 import requests
2 from datetime import datetime, timezone
3
4 def fetch_searchresults(q=None, sp=None):
5 for _ in range(2):
6 today = datetime.now(timezone.utc).strftime("%Y%m%d")
7 r = requests.get(f"https://www.youtube.com/results", {
8 'search_query': q,
9 'pbj': 1, # makes youtube return a json-response
10 'hl': 'en', #'en_US',
11 'sp': sp,
12 }, headers={
13 'x-youtube-client-name': '1',
14 'x-youtube-client-version': f'2.{today}.01.01', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000)
15 })
16 if not r.ok:
17 return None
18
19 # Sometimes, youtube throws an exception after the response already begun.
20 # This can manifest in two ways:
21 # 1) So the status code is 200, begins with JSON and switches to HTML half
22 # way through. WTF?! (This should be "fixed" by retrying, though)
23 # 2) The response just stopping mid-way through like this: response.text ==
24 # '[\r\n{"page": "search","rootVe": "4724"},\r\n{"page": "search",'
25 # hence, just try-catching the decoding step is the easiest way out.
26 try:
27 return r.json()
28 except:
29 continue # will return None once we break out of the loop
30
31 def fetch_ajax(params):
32 """
33 fetch data using a continuation protobuf
34 """
35 # TODO: handle auto_generated!
36 today = datetime.now(timezone.utc).strftime("%Y%m%d")
37
38 r = requests.get(f"https://www.youtube.com/browse_ajax", {
39 'continuation': params,
40 'gl': 'US',
41 'hl': 'en',
42 }, headers={
43 'x-youtube-client-name': '1',
44 'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults()
45 })
46
47 if not r.ok:
48 return None
49
50 return r.json()
Imprint / Impressum