From e320952430abad72362837ef493291cca89aeea7 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Mon, 24 Apr 2023 21:59:30 +0000
Subject: [PATCH 01/16] [DATABASE CHANGE: Migration below] store is-shorts flag
 in subscriptions

this will allow us to filter shorts from the subscription feed.

ALTER TABLE videos ADD COLUMN shorts BOOLEAN DEFAULT NULL;
---
 app/common/common.py | 12 ++++++++----
 config/setup.sql     |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/app/common/common.py b/app/common/common.py
index fc5c0b2..32b6b86 100644
--- a/app/common/common.py
+++ b/app/common/common.py
@@ -129,6 +129,7 @@ def update_channel(db, xmldata, from_webhook=False):
             length = None
             livestream = None
             premiere = None
+            shorts = None
             if meta:
                 meta = video_metadata(meta)
                 published2 = dateutil.parser.parse(meta['published'])
@@ -137,6 +138,7 @@ def update_channel(db, xmldata, from_webhook=False):
                 length = meta['length']
                 livestream = meta['livestream']
                 premiere = meta['premiere']
+                shorts = meta['shorts']
 
             now = datetime.now(timezone.utc)
 
@@ -150,8 +152,8 @@ def update_channel(db, xmldata, from_webhook=False):
 
             c.execute("""
                 INSERT OR IGNORE INTO videos
-                    (id, channel_id, title, length, livestream, premiere, published, crawled)
-                VALUES (?, ?, ?, ?, ?, ?, datetime(?), datetime(?))
+                    (id, channel_id, title, length, livestream, premiere, shorts, published, crawled)
+                VALUES (?, ?, ?, ?, ?, ?, ?, datetime(?), datetime(?))
             """, (
                 video['video_id'],
                 video['channel_id'],
@@ -159,6 +161,7 @@ def update_channel(db, xmldata, from_webhook=False):
                 length,
                 livestream,
                 premiere,
+                shorts,
                 published,
                 timestamp
             ))
@@ -374,8 +377,8 @@ def store_video_metadata(video_id):
             if meta:
                 meta = video_metadata(meta)
                 c.execute("""
-                    INSERT OR IGNORE INTO videos (id, channel_id, title, length, livestream, premiere, published, crawled)
-                                   VALUES (?, ?, ?, ?, ?, ?, datetime(?), datetime(?))
+                    INSERT OR IGNORE INTO videos (id, channel_id, title, length, livestream, premiere, shorts, published, crawled)
+                                   VALUES (?, ?, ?, ?, ?, ?, ?, datetime(?), datetime(?))
                 """, (
                     video_id,
                     meta['channel_id'],
@@ -383,6 +386,7 @@ def store_video_metadata(video_id):
                     meta['length'],
                     meta['livestream'],
                     meta['premiere'],
+                    meta['shorts'],
                     meta['published'],
                     meta['published'],
                 ))
diff --git a/config/setup.sql b/config/setup.sql
index 4514e83..1f76da4 100644
--- a/config/setup.sql
+++ b/config/setup.sql
@@ -21,6 +21,7 @@ CREATE TABLE IF NOT EXISTS videos(
 	length INTEGER,
 	livestream BOOLEAN DEFAULT 0,
 	premiere BOOLEAN DEFAULT 0,
+	shorts BOOLEAN DEFAULT NULL,
 	published DATETIME,
 	crawled DATETIME DEFAULT CURRENT_TIMESTAMP);
 CREATE TABLE IF NOT EXISTS playlist_videos(
-- 
2.39.3


From 87b0fc1556707f76cd30bee6207ef1187becf40e Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Tue, 25 Apr 2023 16:08:34 +0000
Subject: [PATCH 02/16] improve shorts detection

if only one of length>60 or aspect>1 is available we can rule out a
shorts video. previously, we marked this state as undetermiable (NULL).
---
 app/common/common.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/common/common.py b/app/common/common.py
index 32b6b86..8f8ac42 100644
--- a/app/common/common.py
+++ b/app/common/common.py
@@ -346,9 +346,9 @@ def video_metadata(metadata):
     # shorts are <= 60 seconds and vertical or square. if we were unable to
     # determine it, we set it to None.
     is_short = (
-        None if length is None or aspect_ratio is None else
-        True if length <= 60 and aspect_ratio <= 1 else
-        False
+        None if length is None and aspect_ratio is None else
+        True if ((length or 61) <= 60) and ((aspect_ratio or 2) <= 1) else
+        False # length > 60 or aspect_ratio > 1
     )
 
     # Note: 'premiere' videos have livestream=False and published= will be the
-- 
2.39.3


From a80f6344c34a916878d6a3c337ef7bb3177d36b1 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Tue, 25 Apr 2023 18:06:48 +0000
Subject: [PATCH 03/16] fix dismissing multiple flash()es

previously, dismissing one also dismissed all below.
---
 app/static/style.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/static/style.css b/app/static/style.css
index 18a8f50..ef73d0d 100644
--- a/app/static/style.css
+++ b/app/static/style.css
@@ -43,7 +43,7 @@ article {
   box-sizing: border-box;
 }
 
-.flashes>.flash-radio:checked ~ li {
+.flashes>.flash-radio:checked + li {
   display: none;
 }
 .flashes>li .flash-close {
-- 
2.39.3


From 272386ad5bf8a58d36d4639bc3da76fe5df67133 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Wed, 26 Apr 2023 17:07:23 +0000
Subject: [PATCH 04/16] video metadata: prefer videoDetails over microformat
 for length

this short was registered as 61seconds long, so our is_short detection
didn't catch it: b2cy9BvaaY4
---
 app/common/common.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/common/common.py b/app/common/common.py
index 8f8ac42..5a3639f 100644
--- a/app/common/common.py
+++ b/app/common/common.py
@@ -317,8 +317,9 @@ def video_metadata(metadata):
     meta2 = metadata.get('microformat',{}).get('playerMicroformatRenderer',{})
 
     # sometimes, we receive the notification so early that the length is not
-    # yet populated. Nothing we can do about it.
-    length = int(meta2.get('lengthSeconds',0)) or int(meta1.get('lengthSeconds',0)) or None
+    # yet populated. Nothing we can do about it. meta1 and meta2 use a
+    # different rounding strategy, meta2 is sometimes (incorrectly) 1s longer.
+    length = int(meta1.get('lengthSeconds',0)) or int(meta2.get('lengthSeconds',0)) or None
 
     scheduled_time = metadata.get('playabilityStatus',{}) \
         .get('liveStreamability',{}).get('liveStreamabilityRenderer',{}) \
-- 
2.39.3


From 7820b9fcc883a70b9b85a74e0bcb7b4dd14535ae Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Wed, 26 Apr 2023 17:19:02 +0000
Subject: [PATCH 05/16] [DATABASE CHANGE: Migration below] allow setting user
 settings from profile page

CREATE TABLE IF NOT EXISTS user_settings(
       user_id INTEGER,
       setting TEXT NOT NULL,
       value TEXT NOT NULL,
       PRIMARY KEY(user_id, setting),
       FOREIGN KEY(user_id) REFERENCES users(id));
---
 app/common/user.py                 | 22 +++++++++++++++++++++-
 app/templates/account_mgmt.html.j2 | 10 ++++++++++
 config/setup.sql                   |  6 ++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/app/common/user.py b/app/common/user.py
index 7b62e7c..d935bf0 100644
--- a/app/common/user.py
+++ b/app/common/user.py
@@ -2,6 +2,7 @@ from werkzeug.security import generate_password_hash, check_password_hash
 from .common import cf
 import sqlite3
 import secrets
+import json
 from flask_login import LoginManager, UserMixin, login_user, logout_user, login_required, current_user
 from flask import Blueprint, flash, redirect, render_template, url_for, request
 
@@ -107,6 +108,16 @@ def init_login(app):
     def account_manager():
         with sqlite3.connect(cf['global']['database']) as conn:
             c = conn.cursor()
+            c.execute("""
+                SELECT setting, value
+                  FROM user_settings
+                 WHERE user_id = ?
+            """, (current_user.id,))
+            result = c.fetchall()
+            settings = {
+                setting: json.loads(value)
+                for setting, value in result
+            }
             c.execute("""
                 SELECT token
                   FROM user_tokens
@@ -117,7 +128,7 @@ def init_login(app):
                 (login_token,) = result
             else:
                 login_token = ""
-            return render_template('account_mgmt.html.j2', login_token=login_token, random_pwd=secrets.token_hex(16))
+            return render_template('account_mgmt.html.j2', settings=settings, login_token=login_token, random_pwd=secrets.token_hex(16))
 
     @usermgmt.route('/manage/account', methods=['POST'])
     @login_required
@@ -139,6 +150,15 @@ def init_login(app):
                     VALUES (?, ?)
                 """, (current_user.id, new_token))
                 flash('new token generated.', 'info')
+        elif action == 'chset':
+            with sqlite3.connect(cf['global']['database']) as conn:
+                noshorts = request.form.get('noshorts') == 'yes'
+                c = conn.cursor()
+                c.execute("""
+                    INSERT OR REPLACE INTO user_settings (user_id, setting, value)
+                    VALUES (?, ?, ?)
+                """, (current_user.id, "noshorts", json.dumps(noshorts)))
+                flash('settings saved.', 'info')
         elif action == 'addusr':
             if not current_user.admin:
                 return "only admins may do that!", 403
diff --git a/app/templates/account_mgmt.html.j2 b/app/templates/account_mgmt.html.j2
index dce7455..db8928e 100644
--- a/app/templates/account_mgmt.html.j2
+++ b/app/templates/account_mgmt.html.j2
@@ -21,6 +21,16 @@
 		<button name=action value="chtok">Generate New</button>
 	</fieldset>
 	</form>
+
+	<h1>Site settings</h1>
+	<form method=POST>
+	<fieldset><legend>Subscription Feed</legend>
+		<label>Hide shorts:<input {{ 'checked' if settings.noshorts }} name=noshorts value=yes type=checkbox></label><br>
+		<button name=action value="chset">Update</button>
+		<!-- {{ settings|tojson }} -->
+	</fieldset>
+	</form>
+
 	{% if current_user.admin %}
 	<h1>Administration</h1>
 	<form method=POST>
diff --git a/config/setup.sql b/config/setup.sql
index 1f76da4..c94469a 100644
--- a/config/setup.sql
+++ b/config/setup.sql
@@ -76,3 +76,9 @@ CREATE TABLE IF NOT EXISTS users(
 CREATE TABLE IF NOT EXISTS user_tokens( -- stores revocable url tokens for feeds.
 	user_id INTEGER PRIMARY KEY NOT NULL,
 	token TEXT NOT NULL);
+CREATE TABLE IF NOT EXISTS user_settings( -- stores per-user settings as a vertical table.
+	user_id INTEGER,
+	setting TEXT NOT NULL,
+	value TEXT NOT NULL,
+	PRIMARY KEY(user_id, setting),
+	FOREIGN KEY(user_id) REFERENCES users(id));
-- 
2.39.3


From 3730d4e8241cc393a8c4b1d7fb8dd47d3a443def Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Wed, 26 Apr 2023 17:12:21 +0000
Subject: [PATCH 06/16] subscription feed: filter shorts if the user enabled
 the 'noshorts' setting

videos are displayed iff either noshorts config is false or not a shorts
video, but pinning overrides hiding. if shorts are shown, they are
marked 'shorts' instead of the length.
---
 app/youtube/__init__.py             | 21 ++++++++++++++++++---
 app/youtube/templates/index.html.j2 |  2 +-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/app/youtube/__init__.py b/app/youtube/__init__.py
index d0f187c..6acff52 100644
--- a/app/youtube/__init__.py
+++ b/app/youtube/__init__.py
@@ -33,8 +33,21 @@ def feed():
     page = request.args.get('page', 0, type=int)
     with sqlite3.connect(cf['global']['database']) as conn:
         c = conn.cursor()
+
+        settings = {} # fallback for guest user
+        if current_user.is_authenticated:
+            c.execute("""
+                SELECT setting, value
+                  FROM user_settings
+                 WHERE user_id = ?
+            """, (current_user.id,))
+            settings = {
+                setting: json.loads(value)
+                for setting, value in c.fetchall()
+            }
+
         c.execute("""
-	   SELECT videos.id, channel_id, name, title, length, livestream, premiere, published, playlist_videos.playlist_id, display
+       SELECT videos.id, channel_id, name, title, length, livestream, premiere, shorts, published, playlist_videos.playlist_id, display
 	     FROM videos
 	     JOIN channels ON videos.channel_id = channels.id
     LEFT JOIN playlist_videos ON (videos.id = playlist_videos.video_id)
@@ -43,9 +56,10 @@ def feed():
                OR playlist_videos.playlist_id IN (SELECT channel_id FROM subscriptions WHERE user=? AND type = 'playlist')
 	           OR flags.display = 'pinned')
 	          AND flags.display IS NOT 'hidden'
+              AND (flags.display = 'pinned' OR not ? or not shorts)
 	 ORDER BY (display = 'pinned') DESC, crawled DESC
 	    LIMIT 36
-	   OFFSET 36*?""", (token, token, token, page))
+	   OFFSET 36*?""", (token, token, token, settings.get('noshorts', False), page))
         rows = [{
             'video_id': video_id,
             'channel_id': channel_id,
@@ -56,10 +70,11 @@ def feed():
             'premiere': premiere and (# only if it hasn't yet premiered:
                 datetime.strptime(published+'+0000', "%Y-%m-%d %H:%M:%S%z")>datetime.now(tz=timezone.utc)
             ),
+            'shorts': shorts,
             'published': published,
             'playlist': playlist,
             'pinned': display == 'pinned',
-        } for (video_id, channel_id, author, title, length, livestream, premiere, published, playlist, display) in c.fetchall()]
+        } for (video_id, channel_id, author, title, length, livestream, premiere, shorts, published, playlist, display) in c.fetchall()]
     return render_template('index.html.j2', rows=rows, page=page)
 
 @frontend.route('/watch')
diff --git a/app/youtube/templates/index.html.j2 b/app/youtube/templates/index.html.j2
index fbcb3a6..9347427 100644
--- a/app/youtube/templates/index.html.j2
+++ b/app/youtube/templates/index.html.j2
@@ -7,7 +7,7 @@
 {{ super() }}
 <div class="cards">
 {% for row in rows %}
-	{% set badge = 'LIVE' if row.livestream else 'SOON' if row.premiere else row.length|format_time %}
+	{% set badge = 'shorts' if row.shorts else 'LIVE' if row.livestream else 'SOON' if row.premiere else row.length|format_time %}
 	{% call macros.card(row.video_id, row.title, row.published|format_date, row.pinned, badge=badge) %}
 		{{ macros.infobar_subscriptions(row.video_id, row.channel_id, row.author) }}
 	{% endcall %}
-- 
2.39.3


From 6d74ea6802998f0db0075b1778f830d49be019e8 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 12:44:11 +0000
Subject: [PATCH 07/16] use resolve_url endpoint for channel canonicalisation

one less seperate place we call into youtube's frontend apis

we now support:
  - /channel/ucid
  - /c/vanity
  - /user/username
  - /@handle
  - /brandname

according to https://support.google.com/youtube/answer/6180214?hl=en
vanity urls and usernames are legacy, brand urls aren't documented at
all.
---
 app/browse/__init__.py | 15 ++++++++-------
 app/browse/lib.py      | 26 ++++++++------------------
 2 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/app/browse/__init__.py b/app/browse/__init__.py
index 6a6b1aa..4673dd6 100644
--- a/app/browse/__init__.py
+++ b/app/browse/__init__.py
@@ -118,6 +118,7 @@ def channel(channel_id, subpage="videos"):
         is_subscribed=is_subscribed,
         continuation=continuation)
 
+@frontend.route('/<user>/<subpage>')
 @frontend.route('/user/<user>/')
 @frontend.route('/user/<user>/<subpage>')
 @frontend.route('/c/<user>/')
@@ -127,15 +128,17 @@ def channel_redirect(user, subpage=None):
     The browse_ajax 'API' needs the UCID.
     """
 
-    typ = request.path.split("/")[1] # 'c' or 'user'
-
     # inverse of the test in /channel/:
     if re.match(r"(UC[A-Za-z0-9_-]{22})", user):
         return redirect(url_for('.channel', channel_id=user))
 
-    channel_id = canonicalize_channel(user, typ)
+    if subpage not in (None, "home", "videos", "shorts", "streams", "playlists", "community", "channels", "about"):
+        raise NotFound("not a valid channel subpage")
+
+    channel_id = canonicalize_channel(request.path)
     if not channel_id:
-        raise NotFound("channel appears to not exist")
+        raise NotFound("channel does not exist")
+
     return redirect(
         url_for('.channel', channel_id=channel_id, subpage=subpage), 308
     )
@@ -177,9 +180,7 @@ def plain_user_or_video(something):
         # prevent a lot of false-positives (and reduce youtube api calls)
         raise NotFound
 
-    # possible channel names: need to distinguish /name from /@name
-    typ = "c" if something[0] != "@" else ""
-    channel_id = canonicalize_channel(something, typ)
+    channel_id = canonicalize_channel(something) # /vanity or /@handle
     if channel_id:
         return redirect(url_for('.channel', channel_id=channel_id))
     elif re.match(r"^[-_0-9A-Za-z]{11}$", something): # looks like a video id
diff --git a/app/browse/lib.py b/app/browse/lib.py
index 5d46b05..a4becf7 100644
--- a/app/browse/lib.py
+++ b/app/browse/lib.py
@@ -12,7 +12,7 @@ def fetch_ajax(endpoint, **kwargs):
     today = datetime.now(timezone.utc).strftime("%Y%m%d")
 
     # TODO: this is not cached any more! -> https://github.com/reclosedev/requests-cache/issues/154
-    # Note: this 'innertube' API key exists since at least 2015: https://stackoverflow.com/q/33511165
+    # TODO: replace host with youtubei.googleapis.com (used by android)?
     r = requests.post(f"https://www.youtube.com/youtubei/v1/{endpoint}?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={
         **kwargs,
         'context': {'client': {
@@ -28,24 +28,14 @@ def fetch_ajax(endpoint, **kwargs):
 
     return r.json()
 
-def canonicalize_channel(name, typ="c"):
-    if re.fullmatch(r"(UC[A-Za-z0-9_-]{22})", name):
-        return name
+def canonicalize_channel(path):
+    if re.fullmatch(r"(UC[A-Za-z0-9_-]{22})", path):
+        return path
 
-    # get UCID of /c/ (vanity URLs):
-    today = datetime.now(timezone.utc).strftime("%Y%m%d")
-    typ += "/" if typ != "@" else ""
-    r = requests.get(f'https://www.youtube.com/{typ}{name}/about?pbj=1&hl=en_US', headers={
-        'x-youtube-client-name': '1',
-        'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults()
-    })
-    try:
-        return r.json()[1]['response']['metadata']['channelMetadataRenderer']['rssUrl'].split("=")[1]
-    except:
-        pass
-
-    # unable to extract:
-    return None
+    # Note: for /watch, append query string, then return .endpoint.watchEndpoint.videoId
+    resolved = fetch_ajax("navigation/resolve_url", url=f"https://www.youtube.com/{path}")
+    channel_id = resolved.get('endpoint',{}).get('browseEndpoint',{}).get('browseId')
+    return channel_id
 
 def find_and_parse_error(result):
     error_obj = (
-- 
2.39.3


From df351c6694ccb0b3874ae424ba5988699c68cea5 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 13:04:46 +0000
Subject: [PATCH 08/16] remove vertical white space after closing all flashes

---
 app/static/style.css | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/static/style.css b/app/static/style.css
index ef73d0d..1586add 100644
--- a/app/static/style.css
+++ b/app/static/style.css
@@ -29,13 +29,14 @@ article {
 }
 
 .flashes {
-  margin: 0 auto;
-  padding: .5em 1.5em;
+  margin: 1.5em auto 0;
+  padding: 0;
   box-sizing: border-box;
   max-width: 1200px; /* same as .articles */
 }
 
 .flashes>li {
+  margin: .75em 0;
   display: block;
   border-radius: 5px;
   width: 100%;
-- 
2.39.3


From d99f64c52b1f5d786b05d76ab01b221c21e0ae41 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 13:19:21 +0000
Subject: [PATCH 09/16] support attribution_link redirects

---
 app/browse/lib.py       | 2 +-
 app/youtube/__init__.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/app/browse/lib.py b/app/browse/lib.py
index a4becf7..e4e36b6 100644
--- a/app/browse/lib.py
+++ b/app/browse/lib.py
@@ -34,7 +34,7 @@ def canonicalize_channel(path):
 
     # Note: for /watch, append query string, then return .endpoint.watchEndpoint.videoId
     resolved = fetch_ajax("navigation/resolve_url", url=f"https://www.youtube.com/{path}")
-    channel_id = resolved.get('endpoint',{}).get('browseEndpoint',{}).get('browseId')
+    channel_id = (resolved or {}).get('endpoint',{}).get('browseEndpoint',{}).get('browseId')
     return channel_id
 
 def find_and_parse_error(result):
diff --git a/app/youtube/__init__.py b/app/youtube/__init__.py
index 6acff52..48f6a5f 100644
--- a/app/youtube/__init__.py
+++ b/app/youtube/__init__.py
@@ -236,6 +236,12 @@ def plain_user_or_video(something):
         # XXX: something == 'thethoughtemporium' -> 404s
         raise NotFound("Note: some usernames not recognized; try searching it")
 
+@frontend.route('/attribution_link', strict_slashes=False)
+def attribution_link():
+    # /attribution_link?a=anything&u=/channel/UCZYTClx2T1of7BRZ86-8fow
+    # /attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare
+    return redirect(request.args.get('u') or '/')
+
 @frontend.route('/c/<channel_id>/<subpage>')
 @frontend.route('/c/<channel_id>/')
 @frontend.route('/user/<channel_id>/<subpage>')
-- 
2.39.3


From 4d6ac61835207c3922d85870d9ce58a8c7051d5c Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 13:59:58 +0000
Subject: [PATCH 10/16] split common.innertube into youtube.cards and
 browse.innertube

a few things that were used in both places (G, mkthumbs,
log_unknown_card) now live in common.common.
---
 app/browse/__init__.py              |   3 +-
 app/{common => browse}/innertube.py | 155 +---------------------------
 app/browse/lib.py                   |   2 +-
 app/common/common.py                |  38 ++++++-
 app/youtube/cards.py                | 121 ++++++++++++++++++++++
 app/youtube/lib.py                  |   3 +-
 6 files changed, 161 insertions(+), 161 deletions(-)
 rename app/{common => browse}/innertube.py (70%)
 create mode 100644 app/youtube/cards.py

diff --git a/app/browse/__init__.py b/app/browse/__init__.py
index 4673dd6..af06fee 100644
--- a/app/browse/__init__.py
+++ b/app/browse/__init__.py
@@ -1,11 +1,12 @@
+import re
 import requests
 from flask import Blueprint, render_template, request, flash, g, url_for, redirect
 from flask_login import current_user
 from werkzeug.exceptions import BadRequest, NotFound
 
 from ..common.common import *
-from ..common.innertube import *
 from .lib import *
+from .innertube import prepare_searchresults, prepare_channel, prepare_playlist
 from .protobuf import make_sp, make_channel_params, make_playlist_params, Filters
 
 frontend = Blueprint('browse', __name__,
diff --git a/app/common/innertube.py b/app/browse/innertube.py
similarity index 70%
rename from app/common/innertube.py
rename to app/browse/innertube.py
index 49d53ae..b47b6a6 100644
--- a/app/common/innertube.py
+++ b/app/browse/innertube.py
@@ -1,27 +1,7 @@
 # functions that deal with parsing data from youtube's internal API ("innertube")
 
-from urllib.parse import parse_qs, urlparse
-import re
+from ..common.common import mkthumbs, log_unknown_card, G
 
-class G:
-    """
-    null-coalescing version of dict.get() that also works on lists.
-
-    the | operator is overloaded to achieve similar looking code to jq(1) filters.
-    the first found key is used: dict(foo=1)|G('bar','foo') returns 1.
-    """
-    def __init__(self, *keys):
-        self.keys = keys
-    def __ror__(self, other):
-        for key in self.keys:
-            try:    return other[key]
-            except: continue
-        return None
-    class _Text:
-        """ parses youtube's .runs[].text and .simpleText variants """
-        def __ror__(self, other): # Note: only returning runs[0], not concat'ing all!
-            return other|G('simpleText') or other|G('runs')|G(0)|G('text')
-    text = _Text()
 class Select:
     """ |Select('foo') returns the first foo in list, |Select(all='foo') returns all foos. """
     def __init__(self, key=None, *, all=None):
@@ -66,14 +46,6 @@ def prepare_searchresults(yt_results):
 
     return items, extra, more
 
-def prepare_infocards(metadata):
-    cards = metadata.get('cards',{}).get('cardCollectionRenderer',{}).get('cards',[])
-    return list(filter(None, map(parse_infocard, cards)))
-
-def prepare_endcards(metadata):
-    endsc = metadata.get('endscreen',{}).get('endscreenRenderer',{}).get('elements',[])
-    return list(filter(None, map(parse_endcard, endsc)))
-
 def prepare_channel(response, channel_id, channel_name):
     meta1 = response|G('metadata')|G('channelMetadataRenderer')
     meta2 = response|G('microformat')|G('microformatDataRenderer')
@@ -132,27 +104,6 @@ def prepare_playlist(result):
 
     return title, author, channel_id, list(filter(None, map(parse_playlist, unparsed))), more
 
-def mkthumbs(thumbs):
-    output = {str(e['height']): e['url'] for e in thumbs}
-    largest=next(iter(sorted(output.keys(),reverse=True,key=int)),None)
-    return {**output, 'largest': largest}
-
-def clean_url(url):
-    # externals URLs are redirected through youtube.com/redirect, but we
-    # may encounter internal URLs, too
-    return parse_qs(urlparse(url).query).get('q',[url])[0]
-
-def toInt(s, fallback=0):
-    if s is None:
-        return fallback
-    try:
-        return int(''.join(filter(str.isdigit, s)))
-    except ValueError:
-        return fallback
-
-# Remove left-/rightmost word from string:
-delL = lambda s: s.partition(' ')[2]
-
 def age(s):
     if s is None: # missing from autogen'd music, some livestreams
         return None
@@ -167,16 +118,6 @@ def age(s):
 
     return f"{value}{suffix}"
 
-def log_unknown_card(data):
-    import json
-    try:
-        from flask import request
-        source = request.url
-    except: source = "unknown"
-    with open("/tmp/innertube.err", "a", encoding="utf-8", errors="backslashreplace") as f:
-        f.write(f"\n/***** {source} *****/\n")
-        json.dump(data, f, indent=2)
-
 def parse_result_items(items):
     # TODO: use .get() for most non-essential attributes
     """
@@ -264,100 +205,6 @@ def parse_result_items(items):
             log_unknown_card(item)
     return results, extras
 
-def parse_infocard(card):
-    """
-    parses a single infocard into a format that's easier to handle.
-    """
-    card = card['cardRenderer']
-    if not 'content' in card:
-        return None  # probably the "View corrections" card, ignore.
-    ctype = list(card['content'].keys())[0]
-    content = card['content'][ctype]
-    if ctype == "pollRenderer":
-        return {'type': "POLL", 'content': {
-            'question': content['question']['simpleText'],
-            'answers': [(a['text']['simpleText'],a['numVotes']) \
-                for a in content['choices']],
-        }}
-    elif ctype == "videoInfoCardContentRenderer":
-        is_live = content.get('badge',{}).get('liveBadgeRenderer') is not None
-        return {'type': "VIDEO", 'content': {
-            'video_id': content['action']['watchEndpoint']['videoId'],
-            'title': content['videoTitle']['simpleText'],
-            'author': delL(content['channelName']['simpleText']),
-            'length': content.get('lengthString',{}).get('simpleText') \
-                if not is_live else "LIVE", # "23:03"
-            'views': toInt(content.get('viewCountText',{}).get('simpleText')),
-                # XXX: views sometimes "Starts: July 31, 2020 at 1:30 PM"
-        }}
-    elif ctype == "playlistInfoCardContentRenderer":
-        return {'type': "PLAYLIST", 'content': {
-            'playlist_id': content['action']['watchEndpoint']['playlistId'],
-            'video_id': content['action']['watchEndpoint']['videoId'],
-            'title': content['playlistTitle']['simpleText'],
-            'author': delL(content['channelName']['simpleText']),
-            'n_videos': toInt(content['playlistVideoCount']['simpleText']),
-        }}
-    elif ctype == "simpleCardContentRenderer" and \
-            'urlEndpoint' in content['command']:
-        return {'type': "WEBSITE", 'content': {
-            'url': clean_url(content['command']['urlEndpoint']['url']),
-            'domain': content['displayDomain']['simpleText'],
-            'title': content['title']['simpleText'],
-            # XXX: no thumbnails for infocards
-        }}
-    elif ctype == "collaboratorInfoCardContentRenderer":
-        return {'type': "CHANNEL", 'content': {
-            'channel_id': content['endpoint']['browseEndpoint']['browseId'],
-            'title': content['channelName']['simpleText'],
-            'icons': mkthumbs(content['channelAvatar']['thumbnails']),
-            'subscribers': content.get('subscriberCountText',{}).get('simpleText',''), # "545K subscribers"
-        }}
-    else:
-        log_unknown_card(card)
-        return None
-
-def parse_endcard(card):
-    """
-    parses a single endcard into a format that's easier to handle.
-    """
-    card = card.get('endscreenElementRenderer', card) #only sometimes nested
-    ctype = card['style']
-    if ctype == "CHANNEL":
-        return {'type': ctype, 'content': {
-            'channel_id': card['endpoint']['browseEndpoint']['browseId'],
-            'title': card['title']|G.text,
-            'icons': mkthumbs(card['image']['thumbnails']),
-        }}
-    elif ctype == "VIDEO":
-        if not 'endpoint' in card: return None # title == "This video is unavailable."
-        return {'type': ctype, 'content': {
-            'video_id': card['endpoint']['watchEndpoint']['videoId'],
-            'title': card['title']|G.text,
-            'length': card|G('videoDuration')|G.text,  # '12:21'
-            'views': toInt(card['metadata']|G.text),
-            # XXX: no channel name
-        }}
-    elif ctype == "PLAYLIST":
-        return {'type': ctype, 'content': {
-            'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
-            'video_id': card['endpoint']['watchEndpoint']['videoId'],
-            'title': card['title']|G.text,
-            'author': delL(card['metadata']|G.text),
-            'n_videos': toInt(card['playlistLength']|G.text),
-        }}
-    elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE":
-        url = clean_url(card['endpoint']['urlEndpoint']['url'])
-        return {'type': "WEBSITE", 'content': {
-            'url': url,
-            'domain': urlparse(url).netloc,
-            'title': card['title']|G.text,
-            'icons': mkthumbs(card['image']['thumbnails']),
-        }}
-    else:
-        log_unknown_card(card)
-        return None
-
 def parse_channel_items(items, channel_id, author):
     result = []
     extra = []
diff --git a/app/browse/lib.py b/app/browse/lib.py
index e4e36b6..c389d38 100644
--- a/app/browse/lib.py
+++ b/app/browse/lib.py
@@ -2,7 +2,7 @@ import re
 import requests
 from datetime import datetime, timezone
 
-from ..common.innertube import G
+from ..common.common import G
 
 def fetch_ajax(endpoint, **kwargs):
     """
diff --git a/app/common/common.py b/app/common/common.py
index 5a3639f..1045174 100644
--- a/app/common/common.py
+++ b/app/common/common.py
@@ -50,6 +50,26 @@ class _NSASession(OriginalSession):
         return response
 requests.Session = requests.sessions.Session = _NSASession
 
+class G:
+    """
+    null-coalescing version of dict.get() that also works on lists.
+
+    the | operator is overloaded to achieve similar looking code to jq(1) filters.
+    the first found key is used: dict(foo=1)|G('bar','foo') returns 1.
+    """
+    def __init__(self, *keys):
+        self.keys = keys
+    def __ror__(self, other):
+        for key in self.keys:
+            try:    return other[key]
+            except: continue
+        return None
+    class _Text:
+        """ parses youtube's .runs[].text and .simpleText variants """
+        def __ror__(self, other): # Note: only returning runs[0], not concat'ing all!
+            return other|G('simpleText') or other|G('runs')|G(0)|G('text')
+    text = _Text()
+
 def fetch_xml(feed_type, feed_id):
     # TODO: handle requests.exceptions.ConnectionError
     r = requests.get("https://www.youtube.com/feeds/videos.xml", {
@@ -367,6 +387,11 @@ def video_metadata(metadata):
         'shorts': is_short,
     }
 
+def mkthumbs(thumbs):
+    output = {str(e['height']): e['url'] for e in thumbs}
+    largest=next(iter(sorted(output.keys(),reverse=True,key=int)),None)
+    return {**output, 'largest': largest}
+
 def store_video_metadata(video_id):
     # check if we know about it, and if not, fetch and store video metadata
     with sqlite3.connect(cf['global']['database']) as conn:
@@ -467,7 +492,12 @@ def flask_logger(msg, level="warning"):
     except:
         pass
 
-def pp(*args):
-    from pprint import pprint
-    import sys, codecs
-    pprint(args, stream=codecs.getwriter("utf-8")(sys.stderr.buffer))
+def log_unknown_card(data):
+    import json
+    try:
+        from flask import request
+        source = request.url
+    except: source = "unknown"
+    with open("/tmp/innertube.err", "a", encoding="utf-8", errors="backslashreplace") as f:
+        f.write(f"\n/***** {source} *****/\n")
+        json.dump(data, f, indent=2)
diff --git a/app/youtube/cards.py b/app/youtube/cards.py
new file mode 100644
index 0000000..2377a8f
--- /dev/null
+++ b/app/youtube/cards.py
@@ -0,0 +1,121 @@
+from urllib.parse import parse_qs, urlparse
+
+from ..common.common import mkthumbs, log_unknown_card, G # TODO: temporary, will move to somewhere else in common
+
+def prepare_infocards(metadata):
+    cards = metadata.get('cards',{}).get('cardCollectionRenderer',{}).get('cards',[])
+    return list(filter(None, map(parse_infocard, cards)))
+
+def prepare_endcards(metadata):
+    endsc = metadata.get('endscreen',{}).get('endscreenRenderer',{}).get('elements',[])
+    return list(filter(None, map(parse_endcard, endsc)))
+
+def clean_url(url):
+    # externals URLs are redirected through youtube.com/redirect, but we
+    # may encounter internal URLs, too
+    return parse_qs(urlparse(url).query).get('q',[url])[0]
+
+def toInt(s, fallback=0):
+    if s is None:
+        return fallback
+    try:
+        return int(''.join(filter(str.isdigit, s)))
+    except ValueError:
+        return fallback
+
+# Remove left-/rightmost word from string:
+delL = lambda s: s.partition(' ')[2]
+
+def parse_infocard(card):
+    """
+    parses a single infocard into a format that's easier to handle.
+    """
+    card = card['cardRenderer']
+    if not 'content' in card:
+        return None  # probably the "View corrections" card, ignore.
+    ctype = list(card['content'].keys())[0]
+    content = card['content'][ctype]
+    if ctype == "pollRenderer":
+        return {'type': "POLL", 'content': {
+            'question': content['question']['simpleText'],
+            'answers': [(a['text']['simpleText'],a['numVotes']) \
+                for a in content['choices']],
+        }}
+    elif ctype == "videoInfoCardContentRenderer":
+        is_live = content.get('badge',{}).get('liveBadgeRenderer') is not None
+        return {'type': "VIDEO", 'content': {
+            'video_id': content['action']['watchEndpoint']['videoId'],
+            'title': content['videoTitle']['simpleText'],
+            'author': delL(content['channelName']['simpleText']),
+            'length': content.get('lengthString',{}).get('simpleText') \
+                if not is_live else "LIVE", # "23:03"
+            'views': toInt(content.get('viewCountText',{}).get('simpleText')),
+                # XXX: views sometimes "Starts: July 31, 2020 at 1:30 PM"
+        }}
+    elif ctype == "playlistInfoCardContentRenderer":
+        return {'type': "PLAYLIST", 'content': {
+            'playlist_id': content['action']['watchEndpoint']['playlistId'],
+            'video_id': content['action']['watchEndpoint']['videoId'],
+            'title': content['playlistTitle']['simpleText'],
+            'author': delL(content['channelName']['simpleText']),
+            'n_videos': toInt(content['playlistVideoCount']['simpleText']),
+        }}
+    elif ctype == "simpleCardContentRenderer" and \
+            'urlEndpoint' in content['command']:
+        return {'type': "WEBSITE", 'content': {
+            'url': clean_url(content['command']['urlEndpoint']['url']),
+            'domain': content['displayDomain']['simpleText'],
+            'title': content['title']['simpleText'],
+            # XXX: no thumbnails for infocards
+        }}
+    elif ctype == "collaboratorInfoCardContentRenderer":
+        return {'type': "CHANNEL", 'content': {
+            'channel_id': content['endpoint']['browseEndpoint']['browseId'],
+            'title': content['channelName']['simpleText'],
+            'icons': mkthumbs(content['channelAvatar']['thumbnails']),
+            'subscribers': content.get('subscriberCountText',{}).get('simpleText',''), # "545K subscribers"
+        }}
+    else:
+        log_unknown_card(card)
+        return None
+
+def parse_endcard(card):
+    """
+    parses a single endcard into a format that's easier to handle.
+    """
+    card = card.get('endscreenElementRenderer', card) #only sometimes nested
+    ctype = card['style']
+    if ctype == "CHANNEL":
+        return {'type': ctype, 'content': {
+            'channel_id': card['endpoint']['browseEndpoint']['browseId'],
+            'title': card['title']|G.text,
+            'icons': mkthumbs(card['image']['thumbnails']),
+        }}
+    elif ctype == "VIDEO":
+        if not 'endpoint' in card: return None # title == "This video is unavailable."
+        return {'type': ctype, 'content': {
+            'video_id': card['endpoint']['watchEndpoint']['videoId'],
+            'title': card['title']|G.text,
+            'length': card|G('videoDuration')|G.text,  # '12:21'
+            'views': toInt(card['metadata']|G.text),
+            # XXX: no channel name
+        }}
+    elif ctype == "PLAYLIST":
+        return {'type': ctype, 'content': {
+            'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
+            'video_id': card['endpoint']['watchEndpoint']['videoId'],
+            'title': card['title']|G.text,
+            'author': delL(card['metadata']|G.text),
+            'n_videos': toInt(card['playlistLength']|G.text),
+        }}
+    elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE":
+        url = clean_url(card['endpoint']['urlEndpoint']['url'])
+        return {'type': "WEBSITE", 'content': {
+            'url': url,
+            'domain': urlparse(url).netloc,
+            'title': card['title']|G.text,
+            'icons': mkthumbs(card['image']['thumbnails']),
+        }}
+    else:
+        log_unknown_card(card)
+        return None
diff --git a/app/youtube/lib.py b/app/youtube/lib.py
index 9d42320..e9fe869 100644
--- a/app/youtube/lib.py
+++ b/app/youtube/lib.py
@@ -2,8 +2,9 @@ import re
 import requests
 from urllib.parse import urlparse
 
+from .cards import prepare_infocards, prepare_endcards
 from ..common.common import video_metadata
-from ..common.innertube import prepare_infocards, prepare_endcards, G
+from ..common.common import G
 
 def prepare_metadata(metadata):
     meta = metadata['videoDetails']
-- 
2.39.3


From 887dc64d607b369be5a2e213fe28be266f15f3dd Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 14:33:54 +0000
Subject: [PATCH 11/16] remove invidious redirection from ?show=raw

not very useful (not understood by <video> tags), can't easily be
modified to work with redirect.invidious.io
---
 app/youtube/__init__.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/youtube/__init__.py b/app/youtube/__init__.py
index 48f6a5f..8b07dd8 100644
--- a/app/youtube/__init__.py
+++ b/app/youtube/__init__.py
@@ -156,9 +156,7 @@ def watch():
     if show == "raw":
         if error:
             msg = errdetails if error=='player' else f"{error.upper()}: {errdetails}"
-            return f"{msg}\n\nRedirecting to Invidious.", 502, {
-                'Refresh': f'2; URL={invidious_url}&raw=1',
-                **plaintextheaders}
+            return msg, 400, plaintextheaders # TODO: nicer
         return redirect(video_url, code=307)
     elif show == "json":
         if error and not metadata:
-- 
2.39.3


From a6b9bcf8547c25cf3807e7db6b2c3afc9b9cf045 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 14:40:07 +0000
Subject: [PATCH 12/16] use redirect.invidious.io instead of hard-coding a
 fallback-instance.

note that some instances provide a redirect feature; this would be cool
to have on redirect. or api.invidious.io, but it isn't.
  https://invidious.snopyta.org/redirect?referer=%2Fwatch%3Fv%3D{video_id}

removed from reddit completely (who even used that?).
---
 app/reddit/lib.py       | 4 ++--
 app/youtube/__init__.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/reddit/lib.py b/app/reddit/lib.py
index f1b77d2..70c892e 100644
--- a/app/reddit/lib.py
+++ b/app/reddit/lib.py
@@ -47,11 +47,11 @@ def parse_reddit_videos(data):
             reverse=True)
     for entry in entries:
         e = entry['data']
-        if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us', 'invidious.snopyta.org']:
+        if e['domain'] not in ['youtube.com', 'youtu.be', 'youtube-nocookie.com']:
             continue
         try:
             # Note: youtube.com/<video_id> is not valid (404s), but seen in the wild.
-            match = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/(?:embed|shorts|live)/|youtube.com/)([-_0-9A-Za-z]+)(?:[?&#]t=([0-9hms:]+))?', e['url'])
+            match = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube(?:-nocookie)?.com/(?:embed|shorts|live)/|youtube.com/)([-_0-9A-Za-z]+)(?:[?&#]t=([0-9hms:]+))?', e['url'])
             video_id = match.group(1)
             timestamp = match.group(2)
             maybe_length = re.match(r'.*[\[(](?:00:)?(\d\d?(?::\d\d){1,2})[\])]', e['title'])  # .* to match last occurence in line (probably terrible for performance?)
diff --git a/app/youtube/__init__.py b/app/youtube/__init__.py
index 8b07dd8..81f01e3 100644
--- a/app/youtube/__init__.py
+++ b/app/youtube/__init__.py
@@ -108,8 +108,6 @@ def watch():
         None: "", # proxy disabled globally
         True: "", # proxy is available to this user
     }[proxy_on]
-    extra = {'geolocked':'local=1', 'livestream':'raw=0'}.get(error,'')
-    invidious_url = f"https://invidious.snopyta.org/watch?v={video_id}&{extra}"
     errdetails = {
         'banned':     "Instance is being rate limited.",
         'malformed':  "Video ID is invalid.",
@@ -183,6 +181,8 @@ def watch():
         parsed = microformat_parser(metadata)
         return {'microformat': metadata.get('microformat'),'cards':metadata.get('cards'), '_':parsed}
     else:
+        extra = {'geolocked':'local=1', 'livestream':'raw=0'}.get(error,'')
+        invidious_url = f"https://redirect.invidious.io/watch?v={video_id}&{extra}"
         if error and not metadata: # e.g. malformed, private/deleted video, ...
             return render_template('video-error.html.j2', video_id=video_id,
                 video_error=error, errdetails=errdetails, invidious_url=invidious_url)
-- 
2.39.3


From edcacb013e360b942c6024fdb6d38cb904b0d5ce Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 16:01:02 +0000
Subject: [PATCH 13/16] load pinned/hidden state and button on search results
 and in playlists

---
 app/browse/__init__.py | 14 +++++---------
 app/common/common.py   |  9 +++++++++
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/app/browse/__init__.py b/app/browse/__init__.py
index af06fee..00da8ee 100644
--- a/app/browse/__init__.py
+++ b/app/browse/__init__.py
@@ -17,7 +17,7 @@ frontend = Blueprint('browse', __name__,
 @frontend.route('/results')
 @frontend.route('/search')
 def search():
-    #token = getattr(current_user, 'token', 'guest')
+    token = getattr(current_user, 'token', 'guest')
     q = request.args.get('q') or request.args.get('search_query')
     continuation = request.args.get('continuation')
 
@@ -38,6 +38,7 @@ def search():
         ))
 
         results, extras, continuation = prepare_searchresults(yt_results)
+        results = apply_video_flags(token, results)
 
         for extra in extras:
             flash(extra, 'info')
@@ -89,15 +90,8 @@ def channel(channel_id, subpage="videos"):
         flash("ajax returned nothing; displaying fallback results (15 newest)", "error")
         return fallback_route(channel_id, subpage)
 
-
     # set pin/hide stati of retrieved videos:
-    video_ids = [card['content']['video_id'] for card in rows]
-    pinned, hidden = fetch_video_flags(token, video_ids)
-    rows = sorted([
-        {'type':v['type'], 'content':{**v['content'], 'pinned': v['content']['video_id'] in pinned}}
-        for v in rows
-        if v['content']['video_id'] not in hidden
-    ], key=lambda v:v['content']['pinned'], reverse=True)
+    rows = apply_video_flags(token, rows)
 
     with sqlite3.connect(cf['global']['database']) as conn:
         c = conn.cursor()
@@ -146,6 +140,7 @@ def channel_redirect(user, subpage=None):
 
 @frontend.route('/playlist')
 def playlist():
+    token = getattr(current_user, 'token', 'guest')
     playlist_id = request.args.get('list')
     if not playlist_id:
         raise BadRequest("No playlist ID")
@@ -165,6 +160,7 @@ def playlist():
         return fallback_route()
 
     title, author, channel_id, rows, continuation = prepare_playlist(result)
+    rows = apply_video_flags(token, rows)
 
     return render_template('playlist.html.j2',
         title=title,
diff --git a/app/common/common.py b/app/common/common.py
index 1045174..9223e64 100644
--- a/app/common/common.py
+++ b/app/common/common.py
@@ -438,6 +438,15 @@ def fetch_video_flags(token, video_ids):
 
         return pinned, hidden
 
+def apply_video_flags(token, rows):
+    video_ids = [card['content']['video_id'] for card in rows if 'video_id' in card['content']]
+    pinned, hidden = fetch_video_flags(token, video_ids)
+    return sorted([
+        {'type':v['type'], 'content':{**v['content'], 'pinned': v['content']['video_id'] in pinned if 'video_id' in v['content'] else False}}
+        for v in rows
+        if 'video_id' not in v['content'] or v['content']['video_id'] not in hidden
+    ], key=lambda v:v['content']['pinned'], reverse=True)
+
 from werkzeug.exceptions import NotFound
 class NoFallbackException(NotFound): pass
 def fallback_route(*args, **kwargs): # TODO: worthy as a flask-extension?
-- 
2.39.3


From bdbe0b638fc257e9b0d1eeb72967f829180c1378 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 16:22:05 +0000
Subject: [PATCH 14/16] improve error view of browse.channel slightly

---
 app/browse/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/browse/__init__.py b/app/browse/__init__.py
index 00da8ee..586dcf9 100644
--- a/app/browse/__init__.py
+++ b/app/browse/__init__.py
@@ -78,7 +78,8 @@ def channel(channel_id, subpage="videos"):
         return fallback_route(channel_id, subpage)
 
     if error:
-        return error, 400 # todo: ugly
+        # mostly 'This channel does not exist' or 'This account has been terminated', hence 404
+        raise NotFound(error)
 
     # new seperated videos/livestreams/shorts don't return metadata
     xmlfeed = fetch_xml("channel_id", channel_id)
-- 
2.39.3


From e71aee2c2249c372192752d4a9580ffc6dbde039 Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 16:42:08 +0000
Subject: [PATCH 15/16] reddit: validate some parameters and use HTTPExceptions

---
 app/reddit/__init__.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/app/reddit/__init__.py b/app/reddit/__init__.py
index a430a96..5a3ff63 100644
--- a/app/reddit/__init__.py
+++ b/app/reddit/__init__.py
@@ -2,6 +2,7 @@ import re
 import sqlite3
 from flask_login import current_user, login_required
 from flask import Blueprint, render_template, request, redirect, flash, url_for
+from werkzeug.exceptions import BadRequest, BadGateway
 
 from ..common.common import *
 from .lib import *
@@ -17,8 +18,15 @@ def reddit(subreddit=None):
     token = getattr(current_user, 'token', 'guest')
     after = request.args.get('after')
 
-    sortorder = request.args.get('s', "hot") # TODO: verify!
-    timerange = request.args.get('t', None) # TODO: verify!
+    sortorder = request.args.get('s', "hot")
+    timerange = request.args.get('t', None)
+
+    if subreddit and not re.fullmatch(r"[-+_0-9A-Za-z]{2,21}", subreddit):
+        raise BadRequest("invalid subreddit")
+    if sortorder not in ("hot", "new", "rising", "controversial", "top"):
+        raise BadRequest("invalid sort order")
+    if timerange not in (None, "hour", "day", "week", "month", "year", "all"):
+        raise BadRequest("invalid top time range")
 
     all_subreddits = get_subreddits(token)
     subreddits = [subreddit] if subreddit else all_subreddits
@@ -30,7 +38,7 @@ def reddit(subreddit=None):
             videos = parse_reddit_videos(data)
             after = data['data']['after']
         except RedditException as e:
-            return f"error retrieving reddit data: <xmp>{e}</xmp>", 502 # TODO: better
+            raise BadGateway(f"error retrieving reddit data: {e}")
 
         # set pin/hide stati of retrieved videos
         video_ids = [v['video_id'] for v in videos]
-- 
2.39.3


From 2f824c438b24cb147081fd911930856a63c3b6ab Mon Sep 17 00:00:00 2001
From: girst <tobi@isticktoit.net>
Date: Sat, 29 Apr 2023 18:30:06 +0000
Subject: [PATCH 16/16] mark upcoming livestreams as 'SOON'

livestreams currently live and finished livestreams will show 'LIVE'.
the latter is a limitation of our collected data (at the time of
crawling, livestreams have no duration which we could show).
---
 app/youtube/__init__.py             | 9 ++++-----
 app/youtube/templates/index.html.j2 | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/app/youtube/__init__.py b/app/youtube/__init__.py
index 81f01e3..08c81ba 100644
--- a/app/youtube/__init__.py
+++ b/app/youtube/__init__.py
@@ -47,7 +47,7 @@ def feed():
             }
 
         c.execute("""
-       SELECT videos.id, channel_id, name, title, length, livestream, premiere, shorts, published, playlist_videos.playlist_id, display
+       SELECT videos.id, channel_id, name, title, length, livestream, premiere, shorts, published > datetime('now') as upcoming, published, playlist_videos.playlist_id, display
 	     FROM videos
 	     JOIN channels ON videos.channel_id = channels.id
     LEFT JOIN playlist_videos ON (videos.id = playlist_videos.video_id)
@@ -67,14 +67,13 @@ def feed():
             'title': title,
             'length': length,
             'livestream': livestream,
-            'premiere': premiere and (# only if it hasn't yet premiered:
-                datetime.strptime(published+'+0000', "%Y-%m-%d %H:%M:%S%z")>datetime.now(tz=timezone.utc)
-            ),
+            'premiere': premiere,
             'shorts': shorts,
+            'upcoming': upcoming,
             'published': published,
             'playlist': playlist,
             'pinned': display == 'pinned',
-        } for (video_id, channel_id, author, title, length, livestream, premiere, shorts, published, playlist, display) in c.fetchall()]
+        } for (video_id, channel_id, author, title, length, livestream, premiere, shorts, upcoming, published, playlist, display) in c.fetchall()]
     return render_template('index.html.j2', rows=rows, page=page)
 
 @frontend.route('/watch')
diff --git a/app/youtube/templates/index.html.j2 b/app/youtube/templates/index.html.j2
index 9347427..3d86f09 100644
--- a/app/youtube/templates/index.html.j2
+++ b/app/youtube/templates/index.html.j2
@@ -7,7 +7,7 @@
 {{ super() }}
 <div class="cards">
 {% for row in rows %}
-	{% set badge = 'shorts' if row.shorts else 'LIVE' if row.livestream else 'SOON' if row.premiere else row.length|format_time %}
+	{% set badge = 'shorts' if row.shorts else 'SOON' if row.upcoming else 'LIVE' if row.livestream else row.length|format_time %}
 	{% call macros.card(row.video_id, row.title, row.published|format_date, row.pinned, badge=badge) %}
 		{{ macros.infobar_subscriptions(row.video_id, row.channel_id, row.author) }}
 	{% endcall %}
-- 
2.39.3