app/browse/lib.py

   1 import re
   2 import requests
   3 from datetime import datetime, timezone
   4
   5 from ..common.common import fetch_xml, parse_xml
   6 from ..common.innertube import G
   7
   8 def fetch_searchresults(q=None, sp=None):
   9   for _ in range(2):
  10     today = datetime.now(timezone.utc).strftime("%Y%m%d")
  11     r = requests.get(f"https://www.youtube.com/results", {
  12         'search_query': q,
  13         'pbj': 1, # makes youtube return a json-response
  14         'hl': 'en', #'en_US',
  15         'sp': sp,
  16     }, headers={
  17         'x-youtube-client-name': '1',
  18         'x-youtube-client-version': f'2.{today}.01.01', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000)
  19     })
  20     if not r.ok:
  21         return None
  22
  23     # Sometimes, youtube throws an exception after the response already begun.
  24     # This can manifest in two ways:
  25     # 1) So the status code is 200, begins with JSON and switches to HTML half
  26     #    way through. WTF?! (This should be "fixed" by retrying, though)
  27     # 2) The response just stopping mid-way through like this: response.text ==
  28     #    '[\r\n{"page": "search","rootVe": "4724"},\r\n{"page": "search",'
  29     # hence, just try-catching the decoding step is the easiest way out.
  30     try:
  31         return r.json()
  32     except:
  33         continue # will return None once we break out of the loop
  34
  35 def fetch_ajax(params):
  36     """
  37     fetch data using a continuation protobuf
  38     """
  39     # TODO: handle auto_generated!
  40     today = datetime.now(timezone.utc).strftime("%Y%m%d")
  41
  42     # TODO: this is not cached any more! -> https://github.com/reclosedev/requests-cache/issues/154
  43     # Note: this 'innertube' API key exists since at least 2015: https://stackoverflow.com/q/33511165
  44     r = requests.post(f"https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={
  45         'continuation': params,
  46         'context': {'client': {
  47             'gl': 'US',
  48             'hl': 'en',
  49             'clientName': 'WEB',
  50             'clientVersion': f'2.{today}.01.01',
  51         }},
  52     })
  53
  54     if not r.ok:
  55         return None
  56
  57     return r.json()
  58
  59 def canonicalize_channel(name):
  60     if re.fullmatch(r"(UC[A-Za-z0-9_-]{22})", name):
  61         return name
  62
  63     # try /user/ (legacy URLs):
  64     xmlfeed = fetch_xml("user", name)
  65     if xmlfeed:
  66         _, _, _, channel_id, _ = parse_xml(xmlfeed)
  67         return channel_id
  68
  69     # get UCID of /c/ (vanity URLs):
  70     today = datetime.now(timezone.utc).strftime("%Y%m%d")
  71     r = requests.get(f'https://www.youtube.com/c/{name}/about?pbj=1&hl=en_US', headers={
  72         'x-youtube-client-name': '1',
  73         'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults()
  74     })
  75     try:
  76         return r.json()[1]['response']['metadata']['channelMetadataRenderer']['rssUrl'].split("=")[1]
  77     except:
  78         pass
  79
  80     # unable to extract:
  81     return None
  82
  83 def find_and_parse_error(result):
  84     error_obj = (
  85         result|G('responseContext')|G('errors')|G('error')|G(0)
  86         or result|G('alerts')|G(0)|G('alertRenderer')
  87         or result|G('error')
  88     )
  89     if error_obj is None:
  90         return None
  91
  92     error_type = error_obj|G('code', 'type', 'status') or 'Error'
  93     error = (
  94         error_obj|G('debugInfo', 'externalErrorMessage')
  95         or error_obj|G('text')|G.text
  96         or error_obj|G('message')
  97         or "unknown error"
  98     )
  99     return f"{error_type}: {error.rstrip('.')}"