From 435bc523950eaeb0ef04c96d3fe36cde7f900d70 Mon Sep 17 00:00:00 2001 From: girst Date: Thu, 17 Jun 2021 14:22:10 +0200 Subject: [PATCH] Revert "Revert "remove useless for loop, implement age-gate bypass"" This reverts commit 80ff9d8a9335559ff59ec1bc0f86733bf583ec00. --- app/common/common.py | 149 +++++++++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 69 deletions(-) diff --git a/app/common/common.py b/app/common/common.py index 99ee331..acf3c5c 100644 --- a/app/common/common.py +++ b/app/common/common.py @@ -211,75 +211,86 @@ def get_video_info(video_id, sts=0, algo=""): c = conn.cursor() c.execute("SELECT * FROM captcha_cookies") cookies = dict(c.fetchall()) - for el in ['WEB', 'WEB_EMBEDDED_PLAYER']: # sometimes, only one or the other works - today = datetime.now(timezone.utc).strftime("%Y%m%d") - # XXX: anticaptcha hasn't been adapted - # XXX: this is not cached any more! - # XXX: age-gated now broken: HtVdAasjOgU (embed ok), XgnwCQzjau8 (no embed) - r = requests.post("https://www.youtube-nocookie.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={ - 'videoId': video_id, - 'context': { - 'client': { - 'gl': 'US', - 'hl': 'en', - 'clientName': el, - 'clientVersion': f'2.{today}.01.01', - } - }, - 'playbackContext': {'contentPlaybackContext': {'signatureTimestamp': sts}} - }, cookies=cookies) - - if r.status_code == 429: - return None, None, None, 'banned', 'possible IP ban' - - metadata = r.json() - playabilityStatus = metadata['playabilityStatus']['status'] - if playabilityStatus != "OK": - playabilityReason = metadata['playabilityStatus'].get('reason', - '//'.join(metadata['playabilityStatus'].get('messages',[]))) - player_error = f"{playabilityStatus}: {playabilityReason}" - if playabilityStatus == "UNPLAYABLE": - continue # try again with next el value (or fail as exhausted) - # without videoDetails, there's only the error message - maybe_metadata = metadata if 'videoDetails' in metadata else None - return None, None, maybe_metadata, 'player', player_error - - # livestreams have no adaptive/muxed formats: - is_live = metadata['videoDetails'].get('isLive', False) - - if not 'formats' in metadata['streamingData'] and not is_live: - continue # no urls - - formats = metadata['streamingData'].get('formats',[]) - for (i,v) in enumerate(formats): - if not ('cipher' in v or 'signatureCipher' in v): continue - cipher = parse_qs(v.get('cipher') or v.get('signatureCipher')) - formats[i]['url'] = unscramble(cipher, algo) - - adaptive = metadata['streamingData'].get('adaptiveFormats',[]) - for (i,v) in enumerate(adaptive): - if not ('cipher' in v or 'signatureCipher' in v): continue - cipher = parse_qs(v.get('cipher') or v.get('signatureCipher')) - adaptive[i]['url'] = unscramble(cipher, algo) - - stream_map = { - 'adaptive': adaptive, 'muxed': formats, - 'hlsManifestUrl': metadata['streamingData'].get('hlsManifestUrl'), - } - - url = sorted(formats, key=lambda k: k['height'], reverse=True)[0]['url'] \ - if not is_live else None - - # ip-locked videos can be recovered if the proxy module is loaded: - is_geolocked = 'gcr' in parse_qs(urlparse(url).query) - - nonfatal = 'livestream' if is_live \ - else 'geolocked' if is_geolocked \ - else None - - return url, stream_map, metadata, nonfatal, None - else: - return None, None, metadata, 'exhausted', player_error + today = datetime.now(timezone.utc).strftime("%Y%m%d") + # XXX: anticaptcha hasn't been adapted + # XXX: this is not cached any more! + # XXX: age-gated now broken: HtVdAasjOgU (embed ok), XgnwCQzjau8 (no embed) + r = requests.post("https://www.youtube-nocookie.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={ + 'videoId': video_id, + 'context': { + 'client': { + 'gl': 'US', + 'hl': 'en', + 'clientName': 'WEB', + 'clientVersion': f'2.{today}.01.01', + } + }, + 'playbackContext': {'contentPlaybackContext': {'signatureTimestamp': sts}} + }, cookies=cookies) + + if r.status_code == 429: + return None, None, None, 'banned', 'possible IP ban' + + metadata = r.json() + playabilityStatus = metadata['playabilityStatus']['status'] + if playabilityStatus != "OK": + playabilityReason = metadata['playabilityStatus'].get('reason', + '//'.join(metadata['playabilityStatus'].get('messages',[]))) + player_error = f"{playabilityStatus}: {playabilityReason}" + #if playabilityStatus == "UNPLAYABLE": XXX: do we need that still? + if playabilityStatus == "LOGIN_REQUIRED" and metadata['playabilityStatus'].get('reason') == "Sign in to confirm your age" and sts != 0: + r = requests.get("https://www.youtube.com/get_video_info?html5=1&video_id="+video_id, { + "video_id": video_id, + "eurl": f"https://youtube.googleapis.com/v/{video_id}", + "el": "embedded", + "sts": sts, + "hl": "en_US", + }) + params = parse_qs(r.text) + if 'errorcode' in params: # status=fail + return None, None, None, 'malformed', params['reason'][0] + from flask import current_app + current_app.logger.error(r.text) + metadata = json.loads(params.get('player_response')[0]) + + # without videoDetails, there's only the error message + maybe_metadata = metadata if 'videoDetails' in metadata else None + return None, None, maybe_metadata, 'player', player_error + + # livestreams have no adaptive/muxed formats: + is_live = metadata['videoDetails'].get('isLive', False) + + if not 'formats' in metadata['streamingData'] and not is_live: + return None, None, metadata, 'no-url', player_error + + formats = metadata['streamingData'].get('formats',[]) + for (i,v) in enumerate(formats): + if not ('cipher' in v or 'signatureCipher' in v): continue + cipher = parse_qs(v.get('cipher') or v.get('signatureCipher')) + formats[i]['url'] = unscramble(cipher, algo) + + adaptive = metadata['streamingData'].get('adaptiveFormats',[]) + for (i,v) in enumerate(adaptive): + if not ('cipher' in v or 'signatureCipher' in v): continue + cipher = parse_qs(v.get('cipher') or v.get('signatureCipher')) + adaptive[i]['url'] = unscramble(cipher, algo) + + stream_map = { + 'adaptive': adaptive, 'muxed': formats, + 'hlsManifestUrl': metadata['streamingData'].get('hlsManifestUrl'), + } + + url = sorted(formats, key=lambda k: k['height'], reverse=True)[0]['url'] \ + if not is_live else None + + # ip-locked videos can be recovered if the proxy module is loaded: + is_geolocked = 'gcr' in parse_qs(urlparse(url).query) + + nonfatal = 'livestream' if is_live \ + else 'geolocked' if is_geolocked \ + else None + + return url, stream_map, metadata, nonfatal, None def unscramble(cipher, algo): signature = list(cipher['s'][0]) -- 2.39.3