From d60509cc71b5ded1de82cd805df1816aae3cfce7 Mon Sep 17 00:00:00 2001 From: girst Date: Sun, 6 Dec 2020 18:46:19 +0100 Subject: [PATCH] fix canonicalisation of legacy usernames --- app/browse/lib.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/app/browse/lib.py b/app/browse/lib.py index 1b785e9..798c742 100644 --- a/app/browse/lib.py +++ b/app/browse/lib.py @@ -1,6 +1,9 @@ +import re import requests from datetime import datetime, timezone +from ..common.common import fetch_xml, parse_xml + def fetch_searchresults(q=None, sp=None): for _ in range(2): today = datetime.now(timezone.utc).strftime("%Y%m%d") @@ -50,13 +53,25 @@ def fetch_ajax(params): return r.json() def canonicalize_channel(name): + if re.fullmatch(r"(UC[A-Za-z0-9_-]{22})", name): + return name + + # try /user/ (legacy URLs): + xmlfeed = fetch_xml("user", name) + if xmlfeed: + _, _, _, channel_id, _ = parse_xml(xmlfeed) + return channel_id + + # get UCID of /c/ (vanity URLs): today = datetime.now(timezone.utc).strftime("%Y%m%d") r = requests.get(f'https://www.youtube.com/c/{name}/about?pbj=1&hl=en_US', headers={ 'x-youtube-client-name': '1', 'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults() }) - try: return r.json()[1]['response']['metadata']['channelMetadataRenderer']['rssUrl'].split("=")[1] except: - return None + pass + + # unable to extract: + return None -- 2.39.3