From 6d74ea6802998f0db0075b1778f830d49be019e8 Mon Sep 17 00:00:00 2001 From: girst Date: Sat, 29 Apr 2023 12:44:11 +0000 Subject: [PATCH] use resolve_url endpoint for channel canonicalisation one less seperate place we call into youtube's frontend apis we now support: - /channel/ucid - /c/vanity - /user/username - /@handle - /brandname according to https://support.google.com/youtube/answer/6180214?hl=en vanity urls and usernames are legacy, brand urls aren't documented at all. --- app/browse/__init__.py | 15 ++++++++------- app/browse/lib.py | 26 ++++++++------------------ 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/app/browse/__init__.py b/app/browse/__init__.py index 6a6b1aa..4673dd6 100644 --- a/app/browse/__init__.py +++ b/app/browse/__init__.py @@ -118,6 +118,7 @@ def channel(channel_id, subpage="videos"): is_subscribed=is_subscribed, continuation=continuation) +@frontend.route('//') @frontend.route('/user//') @frontend.route('/user//') @frontend.route('/c//') @@ -127,15 +128,17 @@ def channel_redirect(user, subpage=None): The browse_ajax 'API' needs the UCID. """ - typ = request.path.split("/")[1] # 'c' or 'user' - # inverse of the test in /channel/: if re.match(r"(UC[A-Za-z0-9_-]{22})", user): return redirect(url_for('.channel', channel_id=user)) - channel_id = canonicalize_channel(user, typ) + if subpage not in (None, "home", "videos", "shorts", "streams", "playlists", "community", "channels", "about"): + raise NotFound("not a valid channel subpage") + + channel_id = canonicalize_channel(request.path) if not channel_id: - raise NotFound("channel appears to not exist") + raise NotFound("channel does not exist") + return redirect( url_for('.channel', channel_id=channel_id, subpage=subpage), 308 ) @@ -177,9 +180,7 @@ def plain_user_or_video(something): # prevent a lot of false-positives (and reduce youtube api calls) raise NotFound - # possible channel names: need to distinguish /name from /@name - typ = "c" if something[0] != "@" else "" - channel_id = canonicalize_channel(something, typ) + channel_id = canonicalize_channel(something) # /vanity or /@handle if channel_id: return redirect(url_for('.channel', channel_id=channel_id)) elif re.match(r"^[-_0-9A-Za-z]{11}$", something): # looks like a video id diff --git a/app/browse/lib.py b/app/browse/lib.py index 5d46b05..a4becf7 100644 --- a/app/browse/lib.py +++ b/app/browse/lib.py @@ -12,7 +12,7 @@ def fetch_ajax(endpoint, **kwargs): today = datetime.now(timezone.utc).strftime("%Y%m%d") # TODO: this is not cached any more! -> https://github.com/reclosedev/requests-cache/issues/154 - # Note: this 'innertube' API key exists since at least 2015: https://stackoverflow.com/q/33511165 + # TODO: replace host with youtubei.googleapis.com (used by android)? r = requests.post(f"https://www.youtube.com/youtubei/v1/{endpoint}?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={ **kwargs, 'context': {'client': { @@ -28,24 +28,14 @@ def fetch_ajax(endpoint, **kwargs): return r.json() -def canonicalize_channel(name, typ="c"): - if re.fullmatch(r"(UC[A-Za-z0-9_-]{22})", name): - return name +def canonicalize_channel(path): + if re.fullmatch(r"(UC[A-Za-z0-9_-]{22})", path): + return path - # get UCID of /c/ (vanity URLs): - today = datetime.now(timezone.utc).strftime("%Y%m%d") - typ += "/" if typ != "@" else "" - r = requests.get(f'https://www.youtube.com/{typ}{name}/about?pbj=1&hl=en_US', headers={ - 'x-youtube-client-name': '1', - 'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults() - }) - try: - return r.json()[1]['response']['metadata']['channelMetadataRenderer']['rssUrl'].split("=")[1] - except: - pass - - # unable to extract: - return None + # Note: for /watch, append query string, then return .endpoint.watchEndpoint.videoId + resolved = fetch_ajax("navigation/resolve_url", url=f"https://www.youtube.com/{path}") + channel_id = resolved.get('endpoint',{}).get('browseEndpoint',{}).get('browseId') + return channel_id def find_and_parse_error(result): error_obj = ( -- 2.39.3