app/common/common.py

   1 import os
   2 import re
   3 import json
   4 import html
   5 import base64
   6 import sqlite3
   7 import requests
   8 import hmac, hashlib
   9 import requests_cache
  10 import dateutil.parser
  11 from xml.etree import ElementTree
  12 from configparser import ConfigParser
  13 from datetime import datetime, timezone
  14 from urllib.parse import parse_qs, urlparse
  15
  16 cf = ConfigParser()
  17 config_filename = os.environ.get('YT_CONFIG', '/etc/yt/config.ini')
  18 cf.read(config_filename)
  19 if not 'global' in cf: # todo: full config check
  20     raise Exception("Configuration file not found or empty")
  21
  22 # Note: currently expiring after 10 minutes. googlevideo-urls are valid for 5h59m, but this makes reddit very stale and premiere videos won't start.  TODO: exipre when video is livestream/premiere/etc
  23 requests_cache.install_cache(backend='memory', expire_after=10*60, allowable_codes=(200,))
  24
  25 # Note: this should only be required for the 'memory' backed cache.
  26 # TODO: only run for long-running processes, i.e. the frontend
  27 from threading import Timer
  28 def purge_cache(sec):
  29     requests_cache.remove_expired_responses()
  30     t = Timer(sec, purge_cache, args=(sec,))
  31     t.setDaemon(True)
  32     t.start()
  33 purge_cache(10*60)
  34
  35 # for debugging purposes, monkey patch requests session to store each requests-request in a flask-request's g object (url and response). we can then use a flask error_handler to include the request data in the error log.
  36 # since we also call config from outside the flask appcontext, it is wrapped in a try-catch block.
  37 from flask import g
  38 import requests
  39 from requests import Session as OriginalSession
  40 class _NSASession(OriginalSession):
  41     def request(self, method, url, params=None, data=None, **kwargs):
  42         response = super(_NSASession, self).request(
  43             method, url, params, data, **kwargs
  44             )
  45         try:
  46             if 'api_requests' not in g:
  47                 g.api_requests = []
  48             g.api_requests.append((url, params, response.text))
  49         except RuntimeError: pass # not within flask (e.g. utils.py)
  50         return response
  51 requests.Session = requests.sessions.Session = _NSASession
  52
  53 def fetch_xml(feed_type, feed_id):
  54     # TODO: handle requests.exceptions.ConnectionError
  55     r = requests.get("https://www.youtube.com/feeds/videos.xml", {
  56         feed_type: feed_id,
  57     })
  58     if not r.ok:
  59         return None
  60
  61     return r.content
  62
  63 def parse_xml(xmldata):
  64     ns = {
  65         'atom':"http://www.w3.org/2005/Atom",
  66         'yt': "http://www.youtube.com/xml/schemas/2015",
  67         'media':"http://search.yahoo.com/mrss/",
  68         'at': "http://purl.org/atompub/tombstones/1.0",
  69     }
  70
  71     feed = ElementTree.fromstring(xmldata)
  72
  73     if feed.find('at:deleted-entry',ns):
  74         (_,_,vid) = feed.find('at:deleted-entry',ns).get('ref').rpartition(':')
  75         return None, None, [{'deleted': True, 'video_id': vid}]
  76
  77     title = feed.find('atom:title',ns).text
  78     author = feed.find('atom:author/atom:name',ns).text \
  79         if feed.find('atom:author',ns) else None
  80     videos = []
  81     for entry in feed.findall('atom:entry',ns):
  82         videos.append({
  83             'video_id': entry.find('yt:videoId',ns).text,
  84             'title': entry.find('atom:title',ns).text,
  85             'published': entry.find('atom:published',ns).text,
  86             'channel_id': entry.find('yt:channelId',ns).text,
  87             'author': entry.find('atom:author',ns).find('atom:name',ns).text,
  88             # extra fields for pull_subs/webhook:
  89             'updated': entry.find('atom:updated',ns).text,
  90         })
  91
  92     return title, author, videos
  93
  94 def update_channel(db, xmldata, from_webhook=False):
  95     if not xmldata: return False
  96
  97     # Note: websub does not return global author, hence taking from first video
  98     _, _, videos = parse_xml(xmldata)
  99
 100     c = db.cursor()
 101     from flask import current_app # XXX: remove
 102     for i, video in enumerate(videos):
 103         if video.get('deleted'):
 104             if from_webhook: current_app.logger.warning(f"ignoring deleted video {video['video_id']}") # XXX: remove
 105             # TODO: enable once we enforce hmac validation:
 106             #c.execute("DELETE FROM videos WHERE id = ?", (video['video_id'],))
 107             break
 108
 109         now = datetime.now(timezone.utc)
 110         updated = dateutil.parser.parse(video['updated'])
 111         published = dateutil.parser.parse(video['published'])
 112         # if update and published time are near-identical, we assume it's new.
 113         # checking if it was posted this week is necessary during xmlfeed pulling.
 114         if (updated - published).seconds < 60 and (now - published).days < 7:
 115             timestamp = now
 116             if from_webhook: current_app.logger.warning(f"fresh video {video['video_id']}") # XXX: remove
 117         else:#, it might just an update to an older video, or a previously unlisted one.
 118             # first, assume it's an older video (correct when pulling xmlfeeds)
 119             timestamp = published
 120             # then, check if we don't know about it and if so, look up the real date.
 121
 122             # The 'published' timestamp sent in websub POSTs are often wrong (e.g.:
 123             # video gets uploaded as unlisted on day A and set to public on day B;
 124             # the webhook is sent on day B, but 'published' says A. The video
 125             # therefore looks like it's just an update to an older video). If
 126             # that's the case, we fetch get_video_info and double-check.
 127             # We only need to do this to not-yet-in-the-database videos.
 128             c.execute("SELECT 1 from videos where id = ?", (video['video_id'],))
 129             new_video = len(c.fetchall()) < 1
 130             if from_webhook: current_app.logger.warning(f"video {video['video_id']}") # XXX: remove
 131             if from_webhook and new_video:
 132                 if from_webhook: current_app.logger.warning(f"  is webhook and new") # XXX: remove
 133                 _, meta, _, _ = get_video_info(video['video_id'])
 134                 if meta:
 135                     meta = prepare_metadata(meta)
 136                     published = dateutil.parser.parse(meta['published'])
 137                     if from_webhook: current_app.logger.warning(f"  uploaded {published}") # XXX: remove
 138                     if (now - published).days < 7:
 139                         timestamp = now
 140                     else:#, it's just an update to an older video.
 141                         timestamp = published
 142
 143         c.execute("""
 144             INSERT OR IGNORE INTO videos (id, channel_id, title, published, crawled)
 145                            VALUES (?, ?, ?, datetime(?), datetime(?))
 146         """, (
 147             video['video_id'],
 148             video['channel_id'],
 149             video['title'],
 150             video['published'],
 151             timestamp
 152         ))
 153
 154         if i == 0: # only required once per feed
 155             c.execute("""
 156                 INSERT OR REPLACE INTO channels (id, name)
 157                                 VALUES (?, ?)
 158             """, (video['channel_id'], video['author']))
 159     db.commit()
 160
 161     return True
 162
 163 def get_video_info(video_id, sts=0, algo=""):
 164     """
 165     returns: best-quality muxed video stream, player_response, error-type/mesage
 166     error types: player, malformed, livestream, geolocked, exhausted
 167     """
 168     player_error = None # for 'exhausted'
 169     for el in ['embedded', 'detailpage']:#sometimes, only one or the other works
 170         r = requests.get("https://www.youtube.com/get_video_info", {
 171             "video_id": video_id,
 172             "eurl": f"https://youtube.googleapis.com/v/{video_id}",
 173             "el": el,
 174             "sts": sts,
 175             "hl": "en_US",
 176         })
 177         params = parse_qs(r.text)
 178         if 'errorcode' in params: # status=fail
 179             return None, None, 'malformed', params['reason'][0]
 180
 181         metadata = json.loads(params.get('player_response')[0])
 182         playabilityStatus = metadata['playabilityStatus']['status']
 183         if playabilityStatus != "OK":
 184             playabilityReason = metadata['playabilityStatus'].get('reason',
 185                     '//'.join(metadata['playabilityStatus'].get('messages',[])))
 186             player_error = f"{playabilityStatus}: {playabilityReason}"
 187             if playabilityStatus == "UNPLAYABLE":
 188                 continue  # try again with next el value (or fail as exhausted)
 189             # without videoDetails, there's only the error message
 190             maybe_metadata = metadata if 'videoDetails' in metadata else None
 191             return None, maybe_metadata, 'player', player_error
 192         if metadata['videoDetails']['isLiveContent'] and \
 193                 (metadata['videoDetails'].get('isLive', False) or \
 194                 metadata['videoDetails'].get('isPostLiveDvr', False)):
 195             return None, metadata, 'livestream', None
 196
 197         if not 'formats' in metadata['streamingData']:
 198             continue # no urls
 199
 200         formats = metadata['streamingData']['formats']
 201         for (i,v) in enumerate(formats):
 202             if not ('cipher' in v or 'signatureCipher' in v): continue
 203             cipher = parse_qs(v.get('cipher') or v.get('signatureCipher'))
 204             formats[i]['url'] = unscramble(cipher, algo)
 205
 206         # todo: check if we have urls or try again
 207         muxed = [
 208             f['url'] for f in
 209             sorted(formats, key=lambda k: k['height'], reverse=True)
 210         ]
 211
 212         if 'gcr' in parse_qs(muxed[0]):
 213             return None, metadata, 'geolocked', None
 214
 215         return muxed, metadata, None, None
 216     else:
 217         return None, metadata, 'exhausted', player_error
 218
 219 def unscramble(cipher, algo):  # test video id: UxxajLWwzqY
 220     signature = list(cipher['s'][0])
 221     for c in algo.split():
 222         op, ix = re.match(r"([rsw])(\d+)?", c).groups()
 223         ix = int(ix) % len(signature) if ix else 0
 224         if not op: continue
 225         if op == 'r': signature = list(reversed(signature))
 226         if op == 's': signature = signature[ix:]
 227         if op == 'w': signature[0], signature[ix] = signature[ix], signature[0]
 228     sp = cipher.get('sp', ['signature'])[0]
 229     sig = cipher.get('sig', [''.join(signature)])[0]
 230     return f"{cipher['url'][0]}&{sp}={sig}"
 231
 232 def prepare_metadata(metadata):
 233     meta1 = metadata['videoDetails']
 234     meta2 = metadata['microformat']['playerMicroformatRenderer']
 235     cards = metadata['cards']['cardCollectionRenderer']['cards'] \
 236         if 'cards' in metadata else []
 237     endsc = metadata['endscreen']['endscreenRenderer']['elements'] \
 238         if 'endscreen' in metadata else []
 239
 240     # the actual video streams have exact information:
 241     try:
 242         sd = metadata['streamingData']
 243         some_stream = (sd.get('adaptiveFormats',[]) + sd.get('formats',[]))[0]
 244         aspect_ratio = some_stream['width'] / some_stream['height']
 245     # if that's unavailable (e.g. on livestreams), fall back to
 246     # thumbnails (only either 4:3 or 16:9).
 247     except:
 248         some_img = meta2['thumbnail']['thumbnails'][0]
 249         aspect_ratio = some_img['width'] / some_img['height']
 250
 251     # Note: we could get subtitles in multiple formats directly by querying
 252     # https://video.google.com/timedtext?hl=en&type=list&v=<VIDEO_ID> followed by
 253     # https://www.youtube.com/api/timedtext?lang=<LANG_CODE>&v=<VIDEO_ID>&fmt={srv1|srv2|srv3|ttml|vtt},
 254     # but that won't give us autogenerated subtitles (and is an extra request).
 255     # we can still add &fmt= to the extracted URLs below (first one takes precedence).
 256     subtitles = sorted([
 257         {'url':cc['baseUrl'],
 258          'code':cc['languageCode'],
 259          'autogenerated':cc.get('kind')=="asr",
 260          'name':cc['name']['simpleText'],
 261          'query':"fmt=vtt&"+urlparse(cc['baseUrl']).query} # for our internal proxy
 262         for cc in metadata.get('captions',{})
 263             .get('playerCaptionsTracklistRenderer',{})
 264             .get('captionTracks',[])
 265     ], key=lambda cc: cc['autogenerated'])
 266
 267     def clean_url(url):
 268         # externals URLs are redirected through youtube.com/redirect, but we
 269         # may encounter internal URLs, too
 270         return parse_qs(urlparse(url).query).get('q',[url])[0]
 271     # Remove left-/rightmost word from string:
 272     delL = lambda s: s.partition(' ')[2]
 273     delR = lambda s: s.rpartition(' ')[0]
 274     # Thousands seperator aware int():
 275     intT = lambda s: int(s.replace(',', ''))
 276
 277     def parse_infocard(card):
 278         card = card['cardRenderer']
 279         ctype = list(card['content'].keys())[0]
 280         content = card['content'][ctype]
 281         if ctype == "pollRenderer":
 282             ctype = "POLL"
 283             content = {
 284                 'question': content['question']['simpleText'],
 285                 'answers': [(a['text']['simpleText'],a['numVotes']) \
 286                     for a in content['choices']],
 287             }
 288         elif ctype == "videoInfoCardContentRenderer":
 289             ctype = "VIDEO"
 290             # if the card references a live stream, it has no length, but a "LIVE NOW" badge.
 291             # TODO: this is ugly; cleanup.
 292             is_live = content.get('badge',{}).get('liveBadgeRenderer',{})
 293             length = is_live.get('label',{}).get('simpleText') or content['lengthString']['simpleText']  # '23:03'
 294             content = {
 295                 'video_id': content['action']['watchEndpoint']['videoId'],
 296                 'title': content['videoTitle']['simpleText'],
 297                 'author': delL(content['channelName']['simpleText']),
 298                 'length': length,
 299                 'views': intT(delR(content['viewCountText']['simpleText'])),
 300             }
 301         elif ctype == "playlistInfoCardContentRenderer":
 302             ctype = "PLAYLIST"
 303             content = {
 304                 'playlist_id': content['action']['watchEndpoint']['playlistId'],
 305                 'video_id': content['action']['watchEndpoint']['videoId'],
 306                 'title': content['playlistTitle']['simpleText'],
 307                 'author': delL(content['channelName']['simpleText']),
 308                 'n_videos': intT(content['playlistVideoCount']['simpleText']),
 309             }
 310         elif ctype == "simpleCardContentRenderer" and 'urlEndpoint' in content['command']:
 311             ctype = "WEBSITE"
 312             content = {
 313                 'url': clean_url(content['command']['urlEndpoint']['url']),
 314                 'domain': content['displayDomain']['simpleText'],
 315                 'title': content['title']['simpleText'],
 316                 # XXX: no thumbnails for infocards
 317             }
 318         elif ctype == "collaboratorInfoCardContentRenderer":
 319             ctype = "CHANNEL"
 320             content = {
 321                 'channel_id': content['endpoint']['browseEndpoint']['browseId'],
 322                 'title': content['channelName']['simpleText'],
 323                 'icons': mkthumbs(content['channelAvatar']['thumbnails']),
 324                 'subscribers': content.get('subscriberCountText',{}).get('simpleText',''), # "545K subscribers"
 325             }
 326         else:
 327             import pprint
 328             content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
 329
 330         return {'type': ctype, 'content': content}
 331
 332     def mkthumbs(thumbs):
 333         return {e['height']: e['url'] for e in thumbs}
 334     def parse_endcard(card):
 335         card = card.get('endscreenElementRenderer', card) #only sometimes nested
 336         ctype = card['style']
 337         if ctype == "CHANNEL":
 338             content = {
 339                 'channel_id': card['endpoint']['browseEndpoint']['browseId'],
 340                 'title': card['title']['simpleText'],
 341                 'icons': mkthumbs(card['image']['thumbnails']),
 342             }
 343         elif ctype == "VIDEO":
 344             content = {
 345                 'video_id': card['endpoint']['watchEndpoint']['videoId'], # XXX: KeyError 'endpoint' exception (no idea which youtube video this was on)
 346                 'title': card['title']['simpleText'],
 347                 'length': card['videoDuration']['simpleText'],  # '12:21'
 348                 'views': delR(card['metadata']['simpleText']),
 349                 # XXX: no channel name
 350             }
 351         elif ctype == "PLAYLIST":
 352             content = {
 353                 'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
 354                 'video_id': card['endpoint']['watchEndpoint']['videoId'],
 355                 'title': card['title']['simpleText'],
 356                 'author': delL(card['metadata']['simpleText']),
 357                 'n_videos': intT(delR(card['playlistLength']['simpleText'])),
 358             }
 359         elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE":
 360             ctype = "WEBSITE"
 361             url = clean_url(card['endpoint']['urlEndpoint']['url'])
 362             content = {
 363                 'url': url,
 364                 'domain': urlparse(url).netloc,
 365                 'title': card['title']['simpleText'],
 366                 'icons': mkthumbs(card['image']['thumbnails']),
 367             }
 368         else:
 369             import pprint
 370             content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
 371
 372         return {'type': ctype, 'content': content}
 373
 374     infocards = [parse_infocard(card) for card in cards]
 375     endcards = [parse_endcard(card) for card in endsc]
 376     # combine cards to weed out duplicates. for videos and playlists prefer
 377     # infocards, for channels and websites prefer endcards, as those have more
 378     # information than the other.
 379     # if the card type is not in ident, we use the whole card for comparison
 380     # (otherwise they'd all replace each other)
 381     ident = { # ctype -> ident
 382         'VIDEO': 'video_id',
 383         'PLAYLIST': 'playlist_id',
 384         'CHANNEL': 'channel_id',
 385         'WEBSITE': 'url',
 386         'POLL': 'question',
 387     }
 388     getident = lambda c: c['content'].get(ident.get(c['type']), c)
 389     mkexclude = lambda cards, types: [getident(c) for c in cards if c['type'] in types]
 390     exclude = lambda cards, without: [c for c in cards if getident(c) not in without]
 391
 392     allcards = exclude(infocards, mkexclude(endcards,  ['CHANNEL','WEBSITE'])) + \
 393                exclude(endcards,  mkexclude(infocards, ['VIDEO','PLAYLIST']))
 394
 395     all_countries = """AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ BA BB BD
 396         BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BV BW BY BZ CA CC CD CF CG CH
 397         CI CK CL CM CN CO CR CU CV CW CX CY CZ DE DJ DK DM DO DZ EC EE EG EH ER
 398         ES ET FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GS GT
 399         GU GW GY HK HM HN HR HT HU ID IE IL IM IN IO IQ IR IS IT JE JM JO JP KE
 400         KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD
 401         ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC NE NF
 402         NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PN PR PS PT PW PY QA
 403         RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR SS ST SV SX
 404         SY SZ TC TD TF TG TH TJ TK TL TM TN TO TR TT TV TW TZ UA UG UM US UY UZ
 405         VA VC VE VG VI VN VU WF WS YE YT ZA ZM ZW""".split()
 406     whitelisted = sorted(meta2.get('availableCountries',[]))
 407     blacklisted = sorted(set(all_countries) - set(whitelisted))
 408
 409     published_at = f"{meta2['publishDate']}T00:00:00Z" # yyyy-mm-dd
 410     # 'premiere' videos (and livestreams?) have a ISO8601 date available:
 411     if 'liveBroadcastDetails' in meta2 and 'startTimestamp' in meta2['liveBroadcastDetails']: # TODO: tighten up
 412         published_at = meta2['liveBroadcastDetails']['startTimestamp']
 413
 414     return {
 415         'title': meta1['title'],
 416         'author': meta1['author'],
 417         'channel_id': meta1['channelId'],
 418         'description': meta1['shortDescription'],
 419         'published': published_at,
 420         'views': meta1['viewCount'],
 421         'length': int(meta1['lengthSeconds']),
 422         'rating': meta1['averageRating'],
 423         'category': meta2['category'],
 424         'aspectr': aspect_ratio,
 425         'unlisted': meta2['isUnlisted'],
 426         'whitelisted': whitelisted,
 427         'blacklisted': blacklisted,
 428         'poster': meta2['thumbnail']['thumbnails'][0]['url'],
 429         'infocards': infocards,
 430         'endcards': endcards,
 431         'all_cards': allcards,
 432         'subtitles': subtitles,
 433     }
 434
 435 def store_video_metadata(video_id):
 436     # check if we know about it, and if not, fetch and store video metadata
 437     with sqlite3.connect(cf['global']['database']) as conn:
 438         c = conn.cursor()
 439         c.execute("SELECT 1 from videos where id = ?", (video_id,))
 440         new_video = len(c.fetchall()) < 1
 441         if new_video:
 442             _, meta, _, _ = get_video_info(video_id)
 443             if meta:
 444                 meta = prepare_metadata(meta)
 445                 c.execute("""
 446                     INSERT OR IGNORE INTO videos (id, channel_id, title, published, crawled)
 447                                    VALUES (?, ?, ?, datetime(?), datetime(?))
 448                 """, (
 449                     video_id,
 450                     meta['channel_id'],
 451                     meta['title'],
 452                     meta['published'],
 453                     meta['published'],
 454                 ))
 455                 c.execute("""
 456                     INSERT OR REPLACE INTO channels (id, name)
 457                                     VALUES (?, ?)
 458                 """, (meta['channel_id'], meta['author']))
 459
 460 class RedditException(Exception): pass
 461 def fetch_reddit(subreddits, sorted_by="hot", time=None, *, limit=36,
 462         count=None, before=None, after=None):
 463     """
 464     fetches data from a subreddit (or a multireddit like gif+gifs) and
 465     filters/sorts results.
 466     sorted_by values: hot, new, rising, controversial, top
 467     time values: hour, day, week, month, year, all (for top and controversial)
 468     """
 469
 470     if not subreddits:
 471         return None
 472
 473     query = {k:v for k,v in {
 474         'count':count,
 475         'before':before,
 476         'after':after,
 477         'limit':limit, # 1..100 (default 25)
 478         't': time, # hour,week,month,year,all
 479     }.items() if v}
 480     multireddit = '+'.join(subreddits)
 481     r = requests.get(f"https://old.reddit.com/r/{multireddit}/{sorted_by}.json",
 482             query, headers={'User-Agent':'Mozilla/5.0'})
 483     if not r.ok or not 'data' in r.json():
 484         raise RedditException(r.text)
 485
 486     return r.json()
 487
 488 def fetch_reddit_post(post_id):
 489     # Note: /api/info.json?id=t3_h7mjes == /by_id/t3_h7mjes.json
 490     r = requests.get(f"https://old.reddit.com/by_id/t3_{post_id}.json",
 491             headers={'User-Agent':'Mozilla/5.0'})
 492     if not r.ok or not 'data' in r.json():
 493         raise RedditException(r.text)
 494
 495     return r.json()
 496
 497 def parse_reddit_videos(data):
 498     videos = []
 499     entries = sorted(data['data']['children'],
 500             key=lambda e: e['data']['score'] > 1,
 501             reverse=True)
 502     for entry in entries:
 503         e = entry['data']
 504         if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us']:
 505             continue
 506         try:
 507             # Note: youtube.com/<video_id> is not valid (404s), but seen in the wild.
 508             video_id = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/embed/|youtube.com/)([-_0-9A-Za-z]+)', e['url']).group(1)
 509         except:
 510             continue # XXX: should we log that?
 511         if not video_id: continue
 512         videos.append({
 513             'video_id': video_id,
 514             'title': html.unescape(e['title']), # Note: we unescape and re-escape in the template
 515             'url': e['permalink'],
 516             'n_comments': e['num_comments'],
 517             'n_karma': e['score'],
 518             'subreddit': e['subreddit'],
 519             'post_id': e['id'],
 520         })
 521
 522     return videos
 523
 524 class NoFallbackException(Exception): pass
 525 def fallback_route(*args, **kwargs): # TODO: worthy as a flask-extension?
 526     """
 527     finds the next route that matches the current url rule, and executes it.
 528     args, kwargs: pass all arguments of the current route
 529     """
 530     from flask import current_app, request, g
 531     from werkzeug.exceptions import NotFound
 532
 533     # build a list of endpoints that match the current request's url rule:
 534     matching = [
 535         rule.endpoint
 536         for rule in current_app.url_map.iter_rules()
 537         if rule.rule == request.url_rule.rule
 538     ]
 539     current = matching.index(request.endpoint)
 540
 541     # since we can't change request.endpoint, we always get the original
 542     # endpoint back. so for repeated fall throughs, we use the g object to
 543     # increment how often we want to fall through.
 544     if not '_fallback_next' in g:
 545         g._fallback_next = 0
 546     g._fallback_next += 1
 547
 548     next_ep = current + g._fallback_next
 549
 550     if next_ep < len(matching):
 551         return current_app.view_functions[matching[next_ep]](*args, **kwargs)
 552     else:
 553         raise NoFallbackException
 554
 555 def websub_url_hmac(key, feed_id, timestamp, nonce):
 556     """ generate sha1 hmac, as required by websub/pubsubhubbub """
 557     sig_input = f"{feed_id}:{timestamp}:{nonce}".encode('ascii')
 558     return hmac.new(key.encode('ascii'), sig_input, hashlib.sha1).hexdigest()
 559
 560 def websub_body_hmac(key, body):
 561     return hmac.new(key.encode('ascii'), body, hashlib.sha1).hexdigest()
 562
 563 def pp(*args):
 564     from pprint import pprint
 565     import sys, codecs
 566     pprint(args, stream=codecs.getwriter("utf-8")(sys.stderr.buffer))