From bf9f6e35cca54fbc1b431e1008d53ab5b68726f0 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Thu, 14 Oct 2021 23:35:25 +0200
Subject: [PATCH] switch to ANDROID player API

provideds unthrottled and even unscrambled streams. we do have to give
up some metadata: category, availableCountries, infocards, published.
---
 app/common/common.py                | 48 ++++++------------
 app/common/innertube.py             | 16 +++---
 app/youtube/__init__.py             | 21 +-------
 app/youtube/lib.py                  | 75 ++++++++---------------------
 app/youtube/templates/watch.html.j2 | 17 +------
 5 files changed, 43 insertions(+), 134 deletions(-)

diff --git a/app/common/common.py b/app/common/common.py
index 59bb4e4..81091ae 100644
--- a/app/common/common.py
+++ b/app/common/common.py
@@ -116,7 +116,7 @@ def update_channel(db, xmldata, from_webhook=False):
         c.execute("SELECT 1 FROM videos WHERE id=?",(video['video_id'],))
         new_video = len(c.fetchall()) < 1
         if new_video:
-            _, _, meta, _, _ = get_video_info(video['video_id'])
+            _, _, meta, _, _ = get_video_info(video['video_id'], metaOnly=True)
             # The 'published' timestamp sent in websub POSTs are often wrong (e.g.:
             # video gets uploaded as unlisted on day A and set to public on day B;
             # the webhook is sent on day B, but 'published' says A. The video
@@ -199,7 +199,7 @@ def update_channel(db, xmldata, from_webhook=False):
 
     return True
 
-def get_video_info(video_id, sts=0, algo="", _embed=True):
+def get_video_info(video_id, *, metaOnly=False, _embed=True):
     """
     returns: best-quality muxed video stream, stream map, player_response, error-type/mesage
     error types: player, malformed, livestream, geolocked, agegated, no-url, exhausted
@@ -212,19 +212,20 @@ def get_video_info(video_id, sts=0, algo="", _embed=True):
     today = datetime.now(timezone.utc).strftime("%Y%m%d")
     # XXX: anticaptcha hasn't been adapted
     # XXX: this is not cached any more!
-    r = requests.post("https://www.youtube-nocookie.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={
+    key = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
+    r = requests.post("https://www.youtube-nocookie.com/youtubei/v1/player", params={'key': key}, json={
         'videoId': video_id,
         'context': {
             'client': {
                 'gl': 'US',
                 'hl': 'en',
-                'clientName': 'WEB',
-                'clientVersion': f'2.{today}.01.01',
+                # ANDROID returns streams that are not throttled or cipher-scambled, but less metadata than WEB
+                'clientName': 'ANDROID' if not metaOnly else 'WEB',
+                'clientVersion': f'16.20' if not metaOnly else f'2.{today}.01.01',
                 **({'clientScreen': 'EMBED'} if _embed else {}),
             },
             'thirdParty': {'embedUrl': 'https://google.com'}
         },
-        'playbackContext': {'contentPlaybackContext': {'signatureTimestamp': sts}}
     }, cookies=cookies)
 
     if not r or r.status_code == 429:
@@ -243,11 +244,11 @@ def get_video_info(video_id, sts=0, algo="", _embed=True):
         if (playabilityStatus == "UNPLAYABLE" and
             'proceedButton' in metadata['playabilityStatus'] \
                 .get('errorScreen',{}).get('playerErrorMessageRenderer',{})
-            and sts != 0 # only need metadata when no sts (via pubsubhubbub)
+            and not metaOnly # only need metadata (e.g. called from pubsubhubbub)
             and _embed
 
         ):
-            _, _, metadata_embed, error_embed, errormsg_embed = get_video_info(video_id, sts, algo, _embed=False)
+            _, _, metadata_embed, error_embed, errormsg_embed = get_video_info(video_id, _embed=False)
             if not error_embed or error_embed in ('livestream','geolocked'):
                 metadata = metadata_embed
             elif errormsg_embed == "LOGIN_REQUIRED: Sign in to confirm your age":
@@ -266,17 +267,7 @@ def get_video_info(video_id, sts=0, algo="", _embed=True):
         return None, None, metadata, 'no-url', player_error
 
     formats = metadata['streamingData'].get('formats',[])
-    for (i,v) in enumerate(formats):
-        if not ('cipher' in v or 'signatureCipher' in v): continue
-        cipher = parse_qs(v.get('cipher') or v.get('signatureCipher'))
-        formats[i]['url'] = unscramble(cipher, algo)
-
     adaptive = metadata['streamingData'].get('adaptiveFormats',[])
-    for (i,v) in enumerate(adaptive):
-        if not ('cipher' in v or 'signatureCipher' in v): continue
-        cipher = parse_qs(v.get('cipher') or v.get('signatureCipher'))
-        adaptive[i]['url'] = unscramble(cipher, algo)
-
     stream_map = {
         'adaptive_video': [a for a in adaptive if a['mimeType'].startswith('video/')],
         'adaptive_audio': [a for a in adaptive if a['mimeType'].startswith('audio/')],
@@ -296,31 +287,20 @@ def get_video_info(video_id, sts=0, algo="", _embed=True):
 
     return url, stream_map, metadata, nonfatal, None
 
-def unscramble(cipher, algo):
-    signature = list(cipher['s'][0])
-    for c in algo.split():
-        op, ix = re.match(r"([rsw])(\d+)?", c).groups()
-        ix = int(ix) % len(signature) if ix else 0
-        if op == 'r': signature = list(reversed(signature))
-        if op == 's': signature = signature[ix:]
-        if op == 'w': signature[0], signature[ix] = signature[ix], signature[0]
-    sp = cipher.get('sp', ['signature'])[0]
-    sig = cipher.get('sig', [''.join(signature)])[0]
-    return f"{cipher['url'][0]}&{sp}={sig}"
-
 def video_metadata(metadata):
     if not metadata:
         return {}
 
     meta1 = metadata['videoDetails']
-    meta2 = metadata['microformat']['playerMicroformatRenderer']
+    # With ANDROID player API, we don't get microformat => no publishDate!
+    meta2 = metadata.get('microformat',{}).get('playerMicroformatRenderer',{})
 
     # sometimes, we receive the notification so early that the length is not
     # yet populated. Nothing we can do about it.
-    length = int(meta2['lengthSeconds']) or int(meta1['lengthSeconds']) or None
+    length = int(meta1.get('lengthSeconds',0)) or None
 
     published_at = meta2.get('liveBroadcastDetails',{}) \
-        .get('startTimestamp', f"{meta2['publishDate']}T00:00:00Z")
+        .get('startTimestamp', f"{meta2.get('publishDate','1970-01-01')}T00:00:00Z")
 
     # Note: 'premiere' videos have livestream=False and published= will be the
     # start of the premiere.
@@ -341,7 +321,7 @@ def store_video_metadata(video_id):
         c.execute("SELECT 1 from videos where id = ?", (video_id,))
         new_video = len(c.fetchall()) < 1
         if new_video:
-            _, _, meta, _, _ = get_video_info(video_id)
+            _, _, meta, _, _ = get_video_info(video_id, metaOnly=True)
             if meta:
                 meta = video_metadata(meta)
                 c.execute("""
diff --git a/app/common/innertube.py b/app/common/innertube.py
index 8804696..057939a 100644
--- a/app/common/innertube.py
+++ b/app/common/innertube.py
@@ -289,32 +289,32 @@ def parse_endcard(card):
     if ctype == "CHANNEL":
         return {'type': ctype, 'content': {
             'channel_id': card['endpoint']['browseEndpoint']['browseId'],
-            'title': card['title']['simpleText'],
+            'title': card['title']|G.text,
             'icons': mkthumbs(card['image']['thumbnails']),
         }}
     elif ctype == "VIDEO":
         if not 'endpoint' in card: return None # title == "This video is unavailable."
         return {'type': ctype, 'content': {
             'video_id': card['endpoint']['watchEndpoint']['videoId'],
-            'title': card['title']['simpleText'],
-            'length': card['videoDuration']['simpleText'],  # '12:21'
-            'views': toInt(card['metadata']['simpleText']),
+            'title': card['title']|G.text,
+            'length': card['videoDuration']|G.text,  # '12:21'
+            'views': toInt(card['metadata']|G.text),
             # XXX: no channel name
         }}
     elif ctype == "PLAYLIST":
         return {'type': ctype, 'content': {
             'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
             'video_id': card['endpoint']['watchEndpoint']['videoId'],
-            'title': card['title']['simpleText'],
-            'author': delL(card['metadata']['simpleText']),
-            'n_videos': toInt(card['playlistLength']['simpleText']),
+            'title': card['title']|G.text,
+            'author': delL(card['metadata']|G.text),
+            'n_videos': toInt(card['playlistLength']|G.text),
         }}
     elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE":
         url = clean_url(card['endpoint']['urlEndpoint']['url'])
         return {'type': "WEBSITE", 'content': {
             'url': url,
             'domain': urlparse(url).netloc,
-            'title': card['title']['simpleText'],
+            'title': card['title']|G.text,
             'icons': mkthumbs(card['image']['thumbnails']),
         }}
     else:
diff --git a/app/youtube/__init__.py b/app/youtube/__init__.py
index 23e685c..052de8e 100644
--- a/app/youtube/__init__.py
+++ b/app/youtube/__init__.py
@@ -77,8 +77,7 @@ def watch():
     }
 
     video_id = request.args.get('v')
-    sts, algo = get_cipher()
-    video_url, stream_map, metadata, error, errdetails = get_video_info(video_id, sts, algo)
+    video_url, stream_map, metadata, error, errdetails = get_video_info(video_id)
 
     extra = {'geolocked':'local=1', 'livestream':'raw=0'}.get(error,'')
     invidious_url = f"https://invidious.snopyta.org/watch?v={video_id}&{extra}"
@@ -385,24 +384,6 @@ def redirect_youtube_dot_com(state):
                         strict_slashes=False
                     )
 
-def get_cipher():
-    # reload cipher from database every 1 hour
-    if 'cipher' not in g or time.time() - g.get('cipher_updated', 0) > 1 * 60 * 60:
-        with sqlite3.connect(cf['global']['database']) as conn:
-            c = conn.cursor()
-            c.execute("SELECT sts, algorithm FROM cipher")
-            g.cipher = c.fetchone()
-            g.cipher_updated = time.time()
-
-    return g.cipher
-
-#@frontend.teardown_appcontext
-#def teardown_db():
-#    db = g.pop('db', None)
-#
-#    if db is not None:
-#        db.close()
-
 def undo_flash(thing_id, action):
     undo_action, past_action = {
         'pin': ('unpin', 'pinned'),
diff --git a/app/youtube/lib.py b/app/youtube/lib.py
index 8d688df..a1ac770 100644
--- a/app/youtube/lib.py
+++ b/app/youtube/lib.py
@@ -3,22 +3,19 @@ import requests
 from urllib.parse import urlparse
 
 from ..common.common import video_metadata
-from ..common.innertube import prepare_infocards, prepare_endcards
+from ..common.innertube import prepare_infocards, prepare_endcards, G
 
 def prepare_metadata(metadata):
-    meta1 = metadata['videoDetails']
-    meta2 = metadata['microformat']['playerMicroformatRenderer']
+    meta = metadata['videoDetails']
 
     # the actual video streams have exact information:
     try:
         sd = metadata['streamingData']
         some_stream = (sd.get('adaptiveFormats',[]) + sd.get('formats',[]))[0]
         aspect_ratio = some_stream['width'] / some_stream['height']
-    # if that's unavailable (e.g. on livestreams), fall back to
-    # thumbnails (only either 4:3 or 16:9).
+    # if that's unavailable (e.g. on livestreams), fall back to 16:9
     except:
-        some_img = meta2['thumbnail']['thumbnails'][0]
-        aspect_ratio = some_img['width'] / some_img['height']
+        aspect_ratio = 16/9
 
     # Note: we could get subtitles in multiple formats directly by querying
     # https://video.google.com/timedtext?hl=en&type=list&v=<VIDEO_ID> followed by
@@ -27,80 +24,46 @@ def prepare_metadata(metadata):
     # we can still add &fmt= to the extracted URLs below (first one takes precedence).
     try: # find the native language captions (assuming there is only 1 audioTrack) (any level might not exist):
         default_track = metadata.get('captions',{}).get('playerCaptionsTracklistRenderer',{}).get('defaultAudioTrackIndex', 0)
-        main_subtitle = metadata['captions']['playerCaptionsTracklistRenderer']['audioTracks'][default_track]['defaultCaptionTrackIndex']
+        main_subtitle = metadata['captions']['playerCaptionsTracklistRenderer']['audioTracks'][default_track]['captionTrackIndices']
     except:
         main_subtitle = -1
     subtitles = sorted([
         {'url':cc['baseUrl'],
          'code':cc['languageCode'],
          'autogenerated':cc.get('kind')=="asr",
-         'name':cc['name']['simpleText'],
+         'name':cc['name']|G.text,
          'default':i==main_subtitle,
          'query':"fmt=vtt&"+urlparse(cc['baseUrl']).query} # for our internal proxy
-        for i,cc in enumerate(metadata.get('captions',{})
-            .get('playerCaptionsTracklistRenderer',{})
-            .get('captionTracks',[]))
+        for i,cc in enumerate(metadata|G('captions')
+            |G('playerCaptionsTracklistRenderer')
+            |G('captionTracks') or [])
     # sort order: default lang gets weight 0 (first), other manually translated weight 1, autogenerated weight 2:
     ], key=lambda cc: (not cc['default']) + cc['autogenerated'])
 
-    infocards = prepare_infocards(metadata)
     endcards = prepare_endcards(metadata)
-    # combine cards to weed out duplicates. for videos and playlists prefer
-    # infocards, for channels and websites prefer endcards, as those have more
-    # information than the other.
-    # if the card type is not in ident, we use the whole card for comparison
-    # (otherwise they'd all replace each other)
-    ident = { # ctype -> ident
-        'VIDEO': 'video_id',
-        'PLAYLIST': 'playlist_id',
-        'CHANNEL': 'channel_id',
-        'WEBSITE': 'url',
-        'POLL': 'question',
-    }
-    getident = lambda c: c['content'].get(ident.get(c['type']), c)
-    mkexclude = lambda cards, types: [getident(c) for c in cards if c['type'] in types]
-    exclude = lambda cards, without: [c for c in cards if getident(c) not in without]
-
-    allcards = exclude(infocards, mkexclude(endcards,  ['CHANNEL','WEBSITE'])) + \
-               exclude(endcards,  mkexclude(infocards, ['VIDEO','PLAYLIST']))
-
-    all_countries = """AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ BA BB BD
-        BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BV BW BY BZ CA CC CD CF CG CH
-        CI CK CL CM CN CO CR CU CV CW CX CY CZ DE DJ DK DM DO DZ EC EE EG EH ER
-        ES ET FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GS GT
-        GU GW GY HK HM HN HR HT HU ID IE IL IM IN IO IQ IR IS IT JE JM JO JP KE
-        KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD
-        ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC NE NF
-        NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PN PR PS PT PW PY QA
-        RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR SS ST SV SX
-        SY SZ TC TD TF TG TH TJ TK TL TM TN TO TR TT TV TW TZ UA UG UM US UY UZ
-        VA VC VE VG VI VN VU WF WS YE YT ZA ZM ZW""".split()
-    whitelisted = sorted(meta2.get('availableCountries',[]))
-    blacklisted = sorted(set(all_countries) - set(whitelisted))
 
     # the rating goes from 1 to 5, and is the ratio of up- to down votes, plus 1
-    if meta1['averageRating'] != 0:
-        thumbs_up = 100 * (meta1['averageRating']-1) / 4  # reconstructed ratio
+    if meta['averageRating'] != 0:
+        thumbs_up = 100 * (meta['averageRating']-1) / 4  # reconstructed ratio
         thumbs_dn = 100 - thumbs_up
     else:  # no thumbs given
         thumbs_up = 0
         thumbs_dn = 0
 
+    thumbs = meta['thumbnail']['thumbnails']
+    poster = sorted(thumbs, key=lambda t: t['width'], reverse=True)[0]['url']
+
     return {
         **video_metadata(metadata),
-        'description': meta1['shortDescription'],
-        'rating': meta1['averageRating'],
+        'description': meta['shortDescription'],
+        'rating': meta['averageRating'],
         'thumbs_up': thumbs_up,
         'thumbs_dn': thumbs_dn,
-        'category': meta2['category'],
         'aspectr': aspect_ratio,
-        'unlisted': meta2['isUnlisted'],
-        'whitelisted': whitelisted,
-        'blacklisted': blacklisted,
-        'poster': meta2['thumbnail']['thumbnails'][0]['url'],
-        'infocards': infocards,
+        'unlisted': not meta['isCrawlable'],
+        'poster': poster,
         'endcards': endcards,
-        'all_cards': allcards,
+        'all_cards': endcards,
         'subtitles': subtitles,
     }
 
diff --git a/app/youtube/templates/watch.html.j2 b/app/youtube/templates/watch.html.j2
index 21c7fb1..d47cdda 100644
--- a/app/youtube/templates/watch.html.j2
+++ b/app/youtube/templates/watch.html.j2
@@ -91,8 +91,6 @@ var sha256=function a(b){function c(a,b){return a>>>b|a<<32-b}for(var d,e,f=Math
 	<dd>{{ length | format_time }}
 	<dt>Views
 	<dd>{{ '{0:,}'.format(views | int)|replace(",","'") }}
-	<dt>Published
-	<dd>{{ published.split('T')[0] }}
 	<dt>Rating
 	{% if rating == 0 %}
 	<dd>n/a
@@ -101,19 +99,6 @@ var sha256=function a(b){function c(a,b){return a>>>b|a<<32-b}for(var d,e,f=Math
 	{% endif %}
 	<dt>Visibility
 	<dd>{{ 'unlisted' if unlisted else 'public' }}
-	{% if blacklisted|length == 0 %}
-	<dt>Available in
-	<dd>all regions
-	{% elif whitelisted|length == 0 %}
-	<dt>Blacklisted in
-	<dd>all regions
-	{% elif blacklisted|length > whitelisted|length %}
-	<dt>Available in
-	<dd>{{ whitelisted | join(', ') }}
-	{% else %}
-	<dt>Blocked in
-	<dd>{{ blacklisted | join(', ') }}
-	{% endif %}
 	</dl>
 </details>
 
@@ -133,7 +118,7 @@ var sha256=function a(b){function c(a,b){return a>>>b|a<<32-b}for(var d,e,f=Math
 	</ul>
 </details>
 
-<details><summary>Info- and Endcards</summary>
+<details><summary>Endcards</summary>
 	<div class="cards">
 	{% for card in all_cards %} {# Note: no point in displaying the current channels's channel card #}
 	{{ macros.typed_card(card) if not (card.type == 'CHANNEL' and card.content.channel_id == channel_id) }}
-- 
2.39.3