import requests from datetime import datetime, timezone def fetch_searchresults(q=None, page=1, sp=None): today = datetime.now(timezone.utc).strftime("%Y%m%d") r = requests.get(f"https://www.youtube.com/results", { 'q': q, # Note: if we use '?search_query=' (as yt.com does), we can't use '&page=', but have to use a continuation url that requires an api key 'pbj': 1, # makes youtube return a json-response 'hl': 'en', #'en_US', 'page': page, 'sp': sp, }, headers={ 'x-youtube-client-name': '1', 'x-youtube-client-version': f'2.{today}.0.0', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000) }) if not r.ok: return None return r.json() def fetch_ajax(params): """ fetch data using a continuation protobuf """ # TODO: handle auto_generated! today = datetime.now(timezone.utc).strftime("%Y%m%d") for _ in range(3): # sometimes, this endpoint returns an error and retrying once helps: r = requests.get(f"https://www.youtube.com/browse_ajax", { 'continuation': params, 'gl': 'US', 'hl': 'en', }, headers={ 'x-youtube-client-name': '1', 'x-youtube-client-version': f'2.{today}.0.0', # see fetch_searchresults() }) try: # TODO: cleanup alert = r.json()[1]['response']['alerts'][0]['alertRenderer'] if alert['type'] == "ERROR": # log alert['text']['simpleText'] from flask import current_app current_app.logger.error(f"got error '{alert['text']['simpleText']}', retrying ({_})") # cache bust: from requests_cache import get_cache cache = get_cache() main_key = cache.create_key(r.request) cache.delete(main_key) continue except: pass if not r.ok: return None return r.json()