import re import requests from datetime import datetime, timezone from ..common.common import fetch_xml, parse_xml from ..common.innertube import G def fetch_searchresults(q=None, sp=None): for _ in range(2): today = datetime.now(timezone.utc).strftime("%Y%m%d") r = requests.get(f"https://www.youtube.com/results", { 'search_query': q, 'pbj': 1, # makes youtube return a json-response 'hl': 'en', #'en_US', 'sp': sp, }, headers={ 'x-youtube-client-name': '1', 'x-youtube-client-version': f'2.{today}.01.01', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000) }) if not r.ok: return None # Sometimes, youtube throws an exception after the response already begun. # This can manifest in two ways: # 1) So the status code is 200, begins with JSON and switches to HTML half # way through. WTF?! (This should be "fixed" by retrying, though) # 2) The response just stopping mid-way through like this: response.text == # '[\r\n{"page": "search","rootVe": "4724"},\r\n{"page": "search",' # hence, just try-catching the decoding step is the easiest way out. try: return r.json() except: continue # will return None once we break out of the loop def fetch_ajax(params): """ fetch data using a continuation protobuf """ # TODO: handle auto_generated! today = datetime.now(timezone.utc).strftime("%Y%m%d") # TODO: this is not cached any more! -> https://github.com/reclosedev/requests-cache/issues/154 # Note: this 'innertube' API key exists since at least 2015: https://stackoverflow.com/q/33511165 r = requests.post(f"https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={ 'continuation': params, 'context': {'client': { 'gl': 'US', 'hl': 'en', 'clientName': 'WEB', 'clientVersion': f'2.{today}.01.01', }}, }) if not r.ok: return None return r.json() def canonicalize_channel(name): if re.fullmatch(r"(UC[A-Za-z0-9_-]{22})", name): return name # try /user/ (legacy URLs): xmlfeed = fetch_xml("user", name) if xmlfeed: _, _, _, channel_id, _ = parse_xml(xmlfeed) return channel_id # get UCID of /c/ (vanity URLs): today = datetime.now(timezone.utc).strftime("%Y%m%d") r = requests.get(f'https://www.youtube.com/c/{name}/about?pbj=1&hl=en_US', headers={ 'x-youtube-client-name': '1', 'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults() }) try: return r.json()[1]['response']['metadata']['channelMetadataRenderer']['rssUrl'].split("=")[1] except: pass # unable to extract: return None def find_and_parse_error(result): error_obj = ( result|G('responseContext')|G('errors')|G('error')|G(0) or result|G('alerts')|G(0)|G('alertRenderer') or result|G('error') ) if error_obj is None: return None error_type = error_obj|G('code', 'type', 'status') or 'Error' error = ( error_obj|G('debugInfo', 'externalErrorMessage') or error_obj|G('text')|G.text or error_obj|G('message') or "unknown error" ) return f"{error_type}: {error.rstrip('.')}"