app/browse/lib.py

   1 import requests
   2 from datetime import datetime, timezone
   3
   4 def fetch_searchresults(q=None, sp=None):
   5   for _ in range(2):
   6     today = datetime.now(timezone.utc).strftime("%Y%m%d")
   7     r = requests.get(f"https://www.youtube.com/results", {
   8         'search_query': q,
   9         'pbj': 1, # makes youtube return a json-response
  10         'hl': 'en', #'en_US',
  11         'sp': sp,
  12     }, headers={
  13         'x-youtube-client-name': '1',
  14         'x-youtube-client-version': f'2.{today}.01.01', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000)
  15     })
  16     if not r.ok:
  17         return None
  18
  19     # Sometimes, youtube throws an exception after the response already begun.
  20     # This can manifest in two ways:
  21     # 1) So the status code is 200, begins with JSON and switches to HTML half
  22     #    way through. WTF?! (This should be "fixed" by retrying, though)
  23     # 2) The response just stopping mid-way through like this: response.text ==
  24     #    '[\r\n{"page": "search","rootVe": "4724"},\r\n{"page": "search",'
  25     # hence, just try-catching the decoding step is the easiest way out.
  26     try:
  27         return r.json()
  28     except:
  29         continue # will return None once we break out of the loop
  30
  31 def fetch_ajax(params):
  32     """
  33     fetch data using a continuation protobuf
  34     """
  35     # TODO: handle auto_generated!
  36     today = datetime.now(timezone.utc).strftime("%Y%m%d")
  37
  38     r = requests.get(f"https://www.youtube.com/browse_ajax", {
  39         'continuation': params,
  40         'gl': 'US',
  41         'hl': 'en',
  42     }, headers={
  43         'x-youtube-client-name': '1',
  44         'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults()
  45     })
  46
  47     if not r.ok:
  48         return None
  49
  50     return r.json()