From bf9f6e35cca54fbc1b431e1008d53ab5b68726f0 Mon Sep 17 00:00:00 2001 From: girst Date: Thu, 14 Oct 2021 23:35:25 +0200 Subject: [PATCH] switch to ANDROID player API provideds unthrottled and even unscrambled streams. we do have to give up some metadata: category, availableCountries, infocards, published. --- app/common/common.py | 48 ++++++------------ app/common/innertube.py | 16 +++--- app/youtube/__init__.py | 21 +------- app/youtube/lib.py | 75 ++++++++--------------------- app/youtube/templates/watch.html.j2 | 17 +------ 5 files changed, 43 insertions(+), 134 deletions(-) diff --git a/app/common/common.py b/app/common/common.py index 59bb4e4..81091ae 100644 --- a/app/common/common.py +++ b/app/common/common.py @@ -116,7 +116,7 @@ def update_channel(db, xmldata, from_webhook=False): c.execute("SELECT 1 FROM videos WHERE id=?",(video['video_id'],)) new_video = len(c.fetchall()) < 1 if new_video: - _, _, meta, _, _ = get_video_info(video['video_id']) + _, _, meta, _, _ = get_video_info(video['video_id'], metaOnly=True) # The 'published' timestamp sent in websub POSTs are often wrong (e.g.: # video gets uploaded as unlisted on day A and set to public on day B; # the webhook is sent on day B, but 'published' says A. The video @@ -199,7 +199,7 @@ def update_channel(db, xmldata, from_webhook=False): return True -def get_video_info(video_id, sts=0, algo="", _embed=True): +def get_video_info(video_id, *, metaOnly=False, _embed=True): """ returns: best-quality muxed video stream, stream map, player_response, error-type/mesage error types: player, malformed, livestream, geolocked, agegated, no-url, exhausted @@ -212,19 +212,20 @@ def get_video_info(video_id, sts=0, algo="", _embed=True): today = datetime.now(timezone.utc).strftime("%Y%m%d") # XXX: anticaptcha hasn't been adapted # XXX: this is not cached any more! - r = requests.post("https://www.youtube-nocookie.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={ + key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" + r = requests.post("https://www.youtube-nocookie.com/youtubei/v1/player", params={'key': key}, json={ 'videoId': video_id, 'context': { 'client': { 'gl': 'US', 'hl': 'en', - 'clientName': 'WEB', - 'clientVersion': f'2.{today}.01.01', + # ANDROID returns streams that are not throttled or cipher-scambled, but less metadata than WEB + 'clientName': 'ANDROID' if not metaOnly else 'WEB', + 'clientVersion': f'16.20' if not metaOnly else f'2.{today}.01.01', **({'clientScreen': 'EMBED'} if _embed else {}), }, 'thirdParty': {'embedUrl': 'https://google.com'} }, - 'playbackContext': {'contentPlaybackContext': {'signatureTimestamp': sts}} }, cookies=cookies) if not r or r.status_code == 429: @@ -243,11 +244,11 @@ def get_video_info(video_id, sts=0, algo="", _embed=True): if (playabilityStatus == "UNPLAYABLE" and 'proceedButton' in metadata['playabilityStatus'] \ .get('errorScreen',{}).get('playerErrorMessageRenderer',{}) - and sts != 0 # only need metadata when no sts (via pubsubhubbub) + and not metaOnly # only need metadata (e.g. called from pubsubhubbub) and _embed ): - _, _, metadata_embed, error_embed, errormsg_embed = get_video_info(video_id, sts, algo, _embed=False) + _, _, metadata_embed, error_embed, errormsg_embed = get_video_info(video_id, _embed=False) if not error_embed or error_embed in ('livestream','geolocked'): metadata = metadata_embed elif errormsg_embed == "LOGIN_REQUIRED: Sign in to confirm your age": @@ -266,17 +267,7 @@ def get_video_info(video_id, sts=0, algo="", _embed=True): return None, None, metadata, 'no-url', player_error formats = metadata['streamingData'].get('formats',[]) - for (i,v) in enumerate(formats): - if not ('cipher' in v or 'signatureCipher' in v): continue - cipher = parse_qs(v.get('cipher') or v.get('signatureCipher')) - formats[i]['url'] = unscramble(cipher, algo) - adaptive = metadata['streamingData'].get('adaptiveFormats',[]) - for (i,v) in enumerate(adaptive): - if not ('cipher' in v or 'signatureCipher' in v): continue - cipher = parse_qs(v.get('cipher') or v.get('signatureCipher')) - adaptive[i]['url'] = unscramble(cipher, algo) - stream_map = { 'adaptive_video': [a for a in adaptive if a['mimeType'].startswith('video/')], 'adaptive_audio': [a for a in adaptive if a['mimeType'].startswith('audio/')], @@ -296,31 +287,20 @@ def get_video_info(video_id, sts=0, algo="", _embed=True): return url, stream_map, metadata, nonfatal, None -def unscramble(cipher, algo): - signature = list(cipher['s'][0]) - for c in algo.split(): - op, ix = re.match(r"([rsw])(\d+)?", c).groups() - ix = int(ix) % len(signature) if ix else 0 - if op == 'r': signature = list(reversed(signature)) - if op == 's': signature = signature[ix:] - if op == 'w': signature[0], signature[ix] = signature[ix], signature[0] - sp = cipher.get('sp', ['signature'])[0] - sig = cipher.get('sig', [''.join(signature)])[0] - return f"{cipher['url'][0]}&{sp}={sig}" - def video_metadata(metadata): if not metadata: return {} meta1 = metadata['videoDetails'] - meta2 = metadata['microformat']['playerMicroformatRenderer'] + # With ANDROID player API, we don't get microformat => no publishDate! + meta2 = metadata.get('microformat',{}).get('playerMicroformatRenderer',{}) # sometimes, we receive the notification so early that the length is not # yet populated. Nothing we can do about it. - length = int(meta2['lengthSeconds']) or int(meta1['lengthSeconds']) or None + length = int(meta1.get('lengthSeconds',0)) or None published_at = meta2.get('liveBroadcastDetails',{}) \ - .get('startTimestamp', f"{meta2['publishDate']}T00:00:00Z") + .get('startTimestamp', f"{meta2.get('publishDate','1970-01-01')}T00:00:00Z") # Note: 'premiere' videos have livestream=False and published= will be the # start of the premiere. @@ -341,7 +321,7 @@ def store_video_metadata(video_id): c.execute("SELECT 1 from videos where id = ?", (video_id,)) new_video = len(c.fetchall()) < 1 if new_video: - _, _, meta, _, _ = get_video_info(video_id) + _, _, meta, _, _ = get_video_info(video_id, metaOnly=True) if meta: meta = video_metadata(meta) c.execute(""" diff --git a/app/common/innertube.py b/app/common/innertube.py index 8804696..057939a 100644 --- a/app/common/innertube.py +++ b/app/common/innertube.py @@ -289,32 +289,32 @@ def parse_endcard(card): if ctype == "CHANNEL": return {'type': ctype, 'content': { 'channel_id': card['endpoint']['browseEndpoint']['browseId'], - 'title': card['title']['simpleText'], + 'title': card['title']|G.text, 'icons': mkthumbs(card['image']['thumbnails']), }} elif ctype == "VIDEO": if not 'endpoint' in card: return None # title == "This video is unavailable." return {'type': ctype, 'content': { 'video_id': card['endpoint']['watchEndpoint']['videoId'], - 'title': card['title']['simpleText'], - 'length': card['videoDuration']['simpleText'], # '12:21' - 'views': toInt(card['metadata']['simpleText']), + 'title': card['title']|G.text, + 'length': card['videoDuration']|G.text, # '12:21' + 'views': toInt(card['metadata']|G.text), # XXX: no channel name }} elif ctype == "PLAYLIST": return {'type': ctype, 'content': { 'playlist_id': card['endpoint']['watchEndpoint']['playlistId'], 'video_id': card['endpoint']['watchEndpoint']['videoId'], - 'title': card['title']['simpleText'], - 'author': delL(card['metadata']['simpleText']), - 'n_videos': toInt(card['playlistLength']['simpleText']), + 'title': card['title']|G.text, + 'author': delL(card['metadata']|G.text), + 'n_videos': toInt(card['playlistLength']|G.text), }} elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE": url = clean_url(card['endpoint']['urlEndpoint']['url']) return {'type': "WEBSITE", 'content': { 'url': url, 'domain': urlparse(url).netloc, - 'title': card['title']['simpleText'], + 'title': card['title']|G.text, 'icons': mkthumbs(card['image']['thumbnails']), }} else: diff --git a/app/youtube/__init__.py b/app/youtube/__init__.py index 23e685c..052de8e 100644 --- a/app/youtube/__init__.py +++ b/app/youtube/__init__.py @@ -77,8 +77,7 @@ def watch(): } video_id = request.args.get('v') - sts, algo = get_cipher() - video_url, stream_map, metadata, error, errdetails = get_video_info(video_id, sts, algo) + video_url, stream_map, metadata, error, errdetails = get_video_info(video_id) extra = {'geolocked':'local=1', 'livestream':'raw=0'}.get(error,'') invidious_url = f"https://invidious.snopyta.org/watch?v={video_id}&{extra}" @@ -385,24 +384,6 @@ def redirect_youtube_dot_com(state): strict_slashes=False ) -def get_cipher(): - # reload cipher from database every 1 hour - if 'cipher' not in g or time.time() - g.get('cipher_updated', 0) > 1 * 60 * 60: - with sqlite3.connect(cf['global']['database']) as conn: - c = conn.cursor() - c.execute("SELECT sts, algorithm FROM cipher") - g.cipher = c.fetchone() - g.cipher_updated = time.time() - - return g.cipher - -#@frontend.teardown_appcontext -#def teardown_db(): -# db = g.pop('db', None) -# -# if db is not None: -# db.close() - def undo_flash(thing_id, action): undo_action, past_action = { 'pin': ('unpin', 'pinned'), diff --git a/app/youtube/lib.py b/app/youtube/lib.py index 8d688df..a1ac770 100644 --- a/app/youtube/lib.py +++ b/app/youtube/lib.py @@ -3,22 +3,19 @@ import requests from urllib.parse import urlparse from ..common.common import video_metadata -from ..common.innertube import prepare_infocards, prepare_endcards +from ..common.innertube import prepare_infocards, prepare_endcards, G def prepare_metadata(metadata): - meta1 = metadata['videoDetails'] - meta2 = metadata['microformat']['playerMicroformatRenderer'] + meta = metadata['videoDetails'] # the actual video streams have exact information: try: sd = metadata['streamingData'] some_stream = (sd.get('adaptiveFormats',[]) + sd.get('formats',[]))[0] aspect_ratio = some_stream['width'] / some_stream['height'] - # if that's unavailable (e.g. on livestreams), fall back to - # thumbnails (only either 4:3 or 16:9). + # if that's unavailable (e.g. on livestreams), fall back to 16:9 except: - some_img = meta2['thumbnail']['thumbnails'][0] - aspect_ratio = some_img['width'] / some_img['height'] + aspect_ratio = 16/9 # Note: we could get subtitles in multiple formats directly by querying # https://video.google.com/timedtext?hl=en&type=list&v= followed by @@ -27,80 +24,46 @@ def prepare_metadata(metadata): # we can still add &fmt= to the extracted URLs below (first one takes precedence). try: # find the native language captions (assuming there is only 1 audioTrack) (any level might not exist): default_track = metadata.get('captions',{}).get('playerCaptionsTracklistRenderer',{}).get('defaultAudioTrackIndex', 0) - main_subtitle = metadata['captions']['playerCaptionsTracklistRenderer']['audioTracks'][default_track]['defaultCaptionTrackIndex'] + main_subtitle = metadata['captions']['playerCaptionsTracklistRenderer']['audioTracks'][default_track]['captionTrackIndices'] except: main_subtitle = -1 subtitles = sorted([ {'url':cc['baseUrl'], 'code':cc['languageCode'], 'autogenerated':cc.get('kind')=="asr", - 'name':cc['name']['simpleText'], + 'name':cc['name']|G.text, 'default':i==main_subtitle, 'query':"fmt=vtt&"+urlparse(cc['baseUrl']).query} # for our internal proxy - for i,cc in enumerate(metadata.get('captions',{}) - .get('playerCaptionsTracklistRenderer',{}) - .get('captionTracks',[])) + for i,cc in enumerate(metadata|G('captions') + |G('playerCaptionsTracklistRenderer') + |G('captionTracks') or []) # sort order: default lang gets weight 0 (first), other manually translated weight 1, autogenerated weight 2: ], key=lambda cc: (not cc['default']) + cc['autogenerated']) - infocards = prepare_infocards(metadata) endcards = prepare_endcards(metadata) - # combine cards to weed out duplicates. for videos and playlists prefer - # infocards, for channels and websites prefer endcards, as those have more - # information than the other. - # if the card type is not in ident, we use the whole card for comparison - # (otherwise they'd all replace each other) - ident = { # ctype -> ident - 'VIDEO': 'video_id', - 'PLAYLIST': 'playlist_id', - 'CHANNEL': 'channel_id', - 'WEBSITE': 'url', - 'POLL': 'question', - } - getident = lambda c: c['content'].get(ident.get(c['type']), c) - mkexclude = lambda cards, types: [getident(c) for c in cards if c['type'] in types] - exclude = lambda cards, without: [c for c in cards if getident(c) not in without] - - allcards = exclude(infocards, mkexclude(endcards, ['CHANNEL','WEBSITE'])) + \ - exclude(endcards, mkexclude(infocards, ['VIDEO','PLAYLIST'])) - - all_countries = """AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ BA BB BD - BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BV BW BY BZ CA CC CD CF CG CH - CI CK CL CM CN CO CR CU CV CW CX CY CZ DE DJ DK DM DO DZ EC EE EG EH ER - ES ET FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GS GT - GU GW GY HK HM HN HR HT HU ID IE IL IM IN IO IQ IR IS IT JE JM JO JP KE - KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD - ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC NE NF - NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PN PR PS PT PW PY QA - RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR SS ST SV SX - SY SZ TC TD TF TG TH TJ TK TL TM TN TO TR TT TV TW TZ UA UG UM US UY UZ - VA VC VE VG VI VN VU WF WS YE YT ZA ZM ZW""".split() - whitelisted = sorted(meta2.get('availableCountries',[])) - blacklisted = sorted(set(all_countries) - set(whitelisted)) # the rating goes from 1 to 5, and is the ratio of up- to down votes, plus 1 - if meta1['averageRating'] != 0: - thumbs_up = 100 * (meta1['averageRating']-1) / 4 # reconstructed ratio + if meta['averageRating'] != 0: + thumbs_up = 100 * (meta['averageRating']-1) / 4 # reconstructed ratio thumbs_dn = 100 - thumbs_up else: # no thumbs given thumbs_up = 0 thumbs_dn = 0 + thumbs = meta['thumbnail']['thumbnails'] + poster = sorted(thumbs, key=lambda t: t['width'], reverse=True)[0]['url'] + return { **video_metadata(metadata), - 'description': meta1['shortDescription'], - 'rating': meta1['averageRating'], + 'description': meta['shortDescription'], + 'rating': meta['averageRating'], 'thumbs_up': thumbs_up, 'thumbs_dn': thumbs_dn, - 'category': meta2['category'], 'aspectr': aspect_ratio, - 'unlisted': meta2['isUnlisted'], - 'whitelisted': whitelisted, - 'blacklisted': blacklisted, - 'poster': meta2['thumbnail']['thumbnails'][0]['url'], - 'infocards': infocards, + 'unlisted': not meta['isCrawlable'], + 'poster': poster, 'endcards': endcards, - 'all_cards': allcards, + 'all_cards': endcards, 'subtitles': subtitles, } diff --git a/app/youtube/templates/watch.html.j2 b/app/youtube/templates/watch.html.j2 index 21c7fb1..d47cdda 100644 --- a/app/youtube/templates/watch.html.j2 +++ b/app/youtube/templates/watch.html.j2 @@ -91,8 +91,6 @@ var sha256=function a(b){function c(a,b){return a>>>b|a<<32-b}for(var d,e,f=Math
{{ length | format_time }}
Views
{{ '{0:,}'.format(views | int)|replace(",","'") }} -
Published -
{{ published.split('T')[0] }}
Rating {% if rating == 0 %}
n/a @@ -101,19 +99,6 @@ var sha256=function a(b){function c(a,b){return a>>>b|a<<32-b}for(var d,e,f=Math {% endif %}
Visibility
{{ 'unlisted' if unlisted else 'public' }} - {% if blacklisted|length == 0 %} -
Available in -
all regions - {% elif whitelisted|length == 0 %} -
Blacklisted in -
all regions - {% elif blacklisted|length > whitelisted|length %} -
Available in -
{{ whitelisted | join(', ') }} - {% else %} -
Blocked in -
{{ blacklisted | join(', ') }} - {% endif %} @@ -133,7 +118,7 @@ var sha256=function a(b){function c(a,b){return a>>>b|a<<32-b}for(var d,e,f=Math -
Info- and Endcards +
Endcards
{% for card in all_cards %} {# Note: no point in displaying the current channels's channel card #} {{ macros.typed_card(card) if not (card.type == 'CHANNEL' and card.content.channel_id == channel_id) }} -- 2.39.3