import requests from datetime import datetime, timezone def fetch_searchresults(q=None, sp=None): for _ in range(2): today = datetime.now(timezone.utc).strftime("%Y%m%d") r = requests.get(f"https://www.youtube.com/results", { 'search_query': q, 'pbj': 1, # makes youtube return a json-response 'hl': 'en', #'en_US', 'sp': sp, }, headers={ 'x-youtube-client-name': '1', 'x-youtube-client-version': f'2.{today}.01.01', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000) }) if not r.ok: return None # Sometimes, youtube throws an exception after the response already begun. # So the status code is 200, begins with JSON and switches to HTML half way # through. WTF?! (This should be "fixed" by retrying, though) if r.text.endswith(""): continue # will return None once we break out of the loop # XXX: 2020-10-21: caught a youtube-response that just stopped mid-way through. # response.text == '[\r\n{"page": "search","rootVe": "4724"},\r\n{"page": "search",' try: return r.json() except: continue # will return None once we break out of the loop def fetch_ajax(params): """ fetch data using a continuation protobuf """ # TODO: handle auto_generated! today = datetime.now(timezone.utc).strftime("%Y%m%d") r = requests.get(f"https://www.youtube.com/browse_ajax", { 'continuation': params, 'gl': 'US', 'hl': 'en', }, headers={ 'x-youtube-client-name': '1', 'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults() }) if not r.ok: return None return r.json()