]> git.gir.st - subscriptionfeed.git/blob - app/dangerous/lib.py
useless youtube.com cargoculting
[subscriptionfeed.git] / app / dangerous / lib.py
1 import requests
2 from datetime import datetime, timezone
3
4 def fetch_searchresults(q=None, page=1, sp=None):
5 today = datetime.now(timezone.utc).strftime("%Y%m%d")
6 r = requests.get(f"https://www.youtube.com/results", {
7 'q': q, # Note: if we use '?search_query=' (as yt.com does), we can't use '&page=', but have to use a continuation url that requires an api key
8 'pbj': 1, # makes youtube return a json-response
9 'hl': 'en', #'en_US',
10 'page': page,
11 'sp': sp,
12 }, headers={
13 'x-youtube-client-name': '1',
14 'x-youtube-client-version': f'2.{today}.0.0', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000)
15 })
16 if not r.ok:
17 return None
18
19 return r.json()
20
21 def fetch_ajax(params):
22 """
23 fetch data using a continuation protobuf
24 """
25 # TODO: handle auto_generated!
26 today = datetime.now(timezone.utc).strftime("%Y%m%d")
27
28 for _ in range(3): # sometimes, this endpoint returns an error and retrying once helps:
29 r = requests.get(f"https://www.youtube.com/browse_ajax", {
30 'ctoken': params, # not necessary, but youtube sends it.
31 'continuation': params,
32 'gl': 'US',
33 'hl': 'en',
34 }, headers={
35 'x-youtube-client-name': '1',
36 'x-youtube-client-version': f'2.{today}.0.0', # see fetch_searchresults()
37 })
38
39 try: # TODO: cleanup
40 alert = r.json()[1]['response']['alerts'][0]['alertRenderer']
41 if alert['type'] == "ERROR":
42 # log alert['text']['simpleText']
43 from flask import current_app
44 current_app.logger.error(f"got error '{alert['text']['simpleText']}', retrying ({_})")
45 # cache bust:
46 from requests_cache import get_cache
47 cache = get_cache()
48 main_key = cache.create_key(r.request)
49 cache.delete(main_key)
50 continue
51 except: pass
52
53 if not r.ok:
54 return None
55
56 return r.json()
Imprint / Impressum