app/common/common.py

   1 import os
   2 import re
   3 import json
   4 import html
   5 import base64
   6 import requests
   7 import hmac, hashlib
   8 import requests_cache
   9 import dateutil.parser
  10 from xml.etree import ElementTree
  11 from configparser import ConfigParser
  12 from datetime import datetime, timezone
  13 from urllib.parse import parse_qs, urlparse
  14
  15 cf = ConfigParser()
  16 config_filename = os.environ.get('YT_CONFIG', '/etc/yt/config.ini')
  17 cf.read(config_filename)
  18 if not 'global' in cf: # todo: full config check
  19     raise Exception("Configuration file not found or empty")
  20
  21 # Note: currently expiring after 10 minutes. googlevideo-urls are valid for 5h59m, but this makes reddit very stale and premiere videos won't start.  TODO: exipre when video is livestream/premiere/etc
  22 requests_cache.install_cache(backend='memory', expire_after=10*60, allowable_codes=(200,))
  23
  24 # Note: this should only be required for the 'memory' backed cache.
  25 # TODO: only run for long-running processes, i.e. the frontend
  26 from threading import Timer
  27 def purge_cache(sec):
  28     requests_cache.remove_expired_responses()
  29     t = Timer(sec, purge_cache, args=(sec,))
  30     t.setDaemon(True)
  31     t.start()
  32 purge_cache(10*60)
  33
  34 # for debugging purposes, monkey patch requests session to store each requests-request in a flask-request's g object (url and response). we can then use a flask error_handler to include the request data in the error log.
  35 # since we also call config from outside the flask appcontext, it is wrapped in a try-catch block.
  36 from flask import g
  37 import requests
  38 from requests import Session as OriginalSession
  39 class _NSASession(OriginalSession):
  40     def request(self, method, url, params=None, data=None, **kwargs):
  41         response = super(_NSASession, self).request(
  42             method, url, params, data, **kwargs
  43             )
  44         try:
  45             if 'api_requests' not in g:
  46                 g.api_requests = []
  47             g.api_requests.append((url, params, response.text))
  48         except RuntimeError: pass # not within flask (e.g. utils.py)
  49         return response
  50 requests.Session = requests.sessions.Session = _NSASession
  51
  52 def fetch_xml(feed_type, feed_id):
  53     # TODO: handle requests.exceptions.ConnectionError
  54     r = requests.get("https://www.youtube.com/feeds/videos.xml", {
  55         feed_type: feed_id,
  56     })
  57     if not r.ok:
  58         return None
  59
  60     return r.content
  61
  62 def parse_xml(xmldata):
  63     ns = {
  64         'atom':"http://www.w3.org/2005/Atom",
  65         'yt': "http://www.youtube.com/xml/schemas/2015",
  66         'media':"http://search.yahoo.com/mrss/",
  67         'at': "http://purl.org/atompub/tombstones/1.0",
  68     }
  69
  70     feed = ElementTree.fromstring(xmldata)
  71
  72     if feed.find('at:deleted-entry',ns):
  73         (_,_,vid) = feed.find('at:deleted-entry',ns).get('ref').rpartition(':')
  74         return None, None, [{'deleted': True, 'video_id': vid}]
  75
  76     title = feed.find('atom:title',ns).text
  77     author = feed.find('atom:author/atom:name',ns).text \
  78         if feed.find('atom:author',ns) else None
  79     videos = []
  80     for entry in feed.findall('atom:entry',ns):
  81         videos.append({
  82             'video_id': entry.find('yt:videoId',ns).text,
  83             'title': entry.find('atom:title',ns).text,
  84             'published': entry.find('atom:published',ns).text,
  85             'channel_id': entry.find('yt:channelId',ns).text,
  86             'author': entry.find('atom:author',ns).find('atom:name',ns).text,
  87             # extra fields for pull_subs/webhook:
  88             'updated': entry.find('atom:updated',ns).text,
  89         })
  90
  91     return title, author, videos
  92
  93 def update_channel(db, xmldata, from_webhook=False):
  94     if not xmldata: return False
  95
  96     # Note: websub does not return global author, hence taking from first video
  97     _, _, videos = parse_xml(xmldata)
  98
  99     c = db.cursor()
 100     from flask import current_app # XXX: remove
 101     for i, video in enumerate(videos):
 102         if video.get('deleted'):
 103             if from_webhook: current_app.logger.warning(f"ignoring deleted video {video['video_id']}") # XXX: remove
 104             # TODO: enable once we enforce hmac validation:
 105             #c.execute("DELETE FROM videos WHERE id = ?", (video['video_id'],))
 106             break
 107
 108         now = datetime.now(timezone.utc)
 109         updated = dateutil.parser.parse(video['updated'])
 110         published = dateutil.parser.parse(video['published'])
 111         # if update and published time are near-identical, we assume it's new.
 112         # checking if it was posted this week is necessary during xmlfeed pulling.
 113         if (updated - published).seconds < 60 and (now - published).days < 7:
 114             timestamp = now
 115             if from_webhook: current_app.logger.warning(f"fresh video {video['video_id']}") # XXX: remove
 116         else:#, it might just an update to an older video, or a previously unlisted one.
 117             # first, assume it's an older video (correct when pulling xmlfeeds)
 118             timestamp = published
 119             # then, check if we don't know about it and if so, look up the real date.
 120
 121             # The 'published' timestamp sent in websub POSTs are often wrong (e.g.:
 122             # video gets uploaded as unlisted on day A and set to public on day B;
 123             # the webhook is sent on day B, but 'published' says A. The video
 124             # therefore looks like it's just an update to an older video). If
 125             # that's the case, we fetch get_video_info and double-check.
 126             # We only need to do this to not-yet-in-the-database videos.
 127             c.execute("SELECT 1 from videos where id = ?", (video['video_id'],))
 128             new_video = len(c.fetchall()) < 1
 129             if from_webhook: current_app.logger.warning(f"video {video['video_id']}") # XXX: remove
 130             if from_webhook and new_video:
 131                 if from_webhook: current_app.logger.warning(f"  is webhook and new") # XXX: remove
 132                 _, meta, _, _ = get_video_info(video['video_id'])
 133                 if meta:
 134                     meta = prepare_metadata(meta)
 135                     published = dateutil.parser.parse(meta['published'])
 136                     if from_webhook: current_app.logger.warning(f"  uploaded {published}") # XXX: remove
 137                     if (now - published).days < 7:
 138                         timestamp = now
 139                     else:#, it's just an update to an older video.
 140                         timestamp = published
 141
 142         c.execute("""
 143             INSERT OR IGNORE INTO videos (id, channel_id, title, published, crawled)
 144                            VALUES (?, ?, ?, datetime(?), datetime(?))
 145         """, (
 146             video['video_id'],
 147             video['channel_id'],
 148             video['title'],
 149             video['published'],
 150             timestamp
 151         ))
 152
 153         if i == 0: # only required once per feed
 154             c.execute("""
 155                 INSERT OR REPLACE INTO channels (id, name)
 156                                 VALUES (?, ?)
 157             """, (video['channel_id'], video['author']))
 158     db.commit()
 159
 160     return True
 161
 162 def get_video_info(video_id, sts=0, algo=""):
 163     """
 164     returns: best-quality muxed video stream, player_response, error-type/mesage
 165     error types: player, malformed, livestream, geolocked, exhausted
 166     """
 167     player_error = None # for 'exhausted'
 168     for el in ['embedded', 'detailpage']:#sometimes, only one or the other works
 169         r = requests.get("https://www.youtube.com/get_video_info", {
 170             "video_id": video_id,
 171             "eurl": f"https://youtube.googleapis.com/v/{video_id}",
 172             "el": el,
 173             "sts": sts,
 174             "hl": "en_US",
 175         })
 176         params = parse_qs(r.text)
 177         if 'errorcode' in params: # status=fail
 178             return None, None, 'malformed', params['reason'][0]
 179
 180         metadata = json.loads(params.get('player_response')[0])
 181         playabilityStatus = metadata['playabilityStatus']['status']
 182         if playabilityStatus != "OK":
 183             playabilityReason = metadata['playabilityStatus'].get('reason',
 184                     '//'.join(metadata['playabilityStatus'].get('messages',[])))
 185             player_error = f"{playabilityStatus}: {playabilityReason}"
 186             if playabilityStatus == "UNPLAYABLE":
 187                 continue  # try again with next el value (or fail as exhausted)
 188             # without videoDetails, there's only the error message
 189             maybe_metadata = metadata if 'videoDetails' in metadata else None
 190             return None, maybe_metadata, 'player', player_error
 191         if metadata['videoDetails']['isLiveContent'] and \
 192                 (metadata['videoDetails'].get('isLive', False) or \
 193                 metadata['videoDetails'].get('isPostLiveDvr', False)):
 194             return None, metadata, 'livestream', None
 195
 196         if not 'formats' in metadata['streamingData']:
 197             continue # no urls
 198
 199         formats = metadata['streamingData']['formats']
 200         for (i,v) in enumerate(formats):
 201             if not ('cipher' in v or 'signatureCipher' in v): continue
 202             cipher = parse_qs(v.get('cipher') or v.get('signatureCipher'))
 203             formats[i]['url'] = unscramble(cipher, algo)
 204
 205         # todo: check if we have urls or try again
 206         muxed = [
 207             f['url'] for f in
 208             sorted(formats, key=lambda k: k['height'], reverse=True)
 209         ]
 210
 211         if 'gcr' in parse_qs(muxed[0]):
 212             return None, metadata, 'geolocked', None
 213
 214         return muxed, metadata, None, None
 215     else:
 216         return None, metadata, 'exhausted', player_error
 217
 218 def unscramble(cipher, algo):  # test video id: UxxajLWwzqY
 219     signature = list(cipher['s'][0])
 220     for c in algo.split():
 221         op, ix = re.match(r"([rsw])(\d+)?", c).groups()
 222         ix = int(ix) % len(signature) if ix else 0
 223         if not op: continue
 224         if op == 'r': signature = list(reversed(signature))
 225         if op == 's': signature = signature[ix:]
 226         if op == 'w': signature[0], signature[ix] = signature[ix], signature[0]
 227     sp = cipher.get('sp', ['signature'])[0]
 228     sig = cipher.get('sig', [''.join(signature)])[0]
 229     return f"{cipher['url'][0]}&{sp}={sig}"
 230
 231 def prepare_metadata(metadata):
 232     meta1 = metadata['videoDetails']
 233     meta2 = metadata['microformat']['playerMicroformatRenderer']
 234     cards = metadata['cards']['cardCollectionRenderer']['cards'] \
 235         if 'cards' in metadata else []
 236     endsc = metadata['endscreen']['endscreenRenderer']['elements'] \
 237         if 'endscreen' in metadata else []
 238
 239     # the actual video streams have exact information:
 240     try:
 241         sd = metadata['streamingData']
 242         some_stream = (sd.get('adaptiveFormats',[]) + sd.get('formats',[]))[0]
 243         aspect_ratio = some_stream['width'] / some_stream['height']
 244     # if that's unavailable (e.g. on livestreams), fall back to
 245     # thumbnails (only either 4:3 or 16:9).
 246     except:
 247         some_img = meta2['thumbnail']['thumbnails'][0]
 248         aspect_ratio = some_img['width'] / some_img['height']
 249
 250     # Note: we could get subtitles in multiple formats directly by querying
 251     # https://video.google.com/timedtext?hl=en&type=list&v=<VIDEO_ID> followed by
 252     # https://www.youtube.com/api/timedtext?lang=<LANG_CODE>&v=<VIDEO_ID>&fmt={srv1|srv2|srv3|ttml|vtt},
 253     # but that won't give us autogenerated subtitles (and is an extra request).
 254     # we can still add &fmt= to the extracted URLs below (first one takes precedence).
 255     subtitles = sorted([
 256         {'url':cc['baseUrl'],
 257          'code':cc['languageCode'],
 258          'autogenerated':cc.get('kind')=="asr",
 259          'name':cc['name']['simpleText'],
 260          'query':"fmt=vtt&"+urlparse(cc['baseUrl']).query} # for our internal proxy
 261         for cc in metadata.get('captions',{})
 262             .get('playerCaptionsTracklistRenderer',{})
 263             .get('captionTracks',[])
 264     ], key=lambda cc: cc['autogenerated'])
 265
 266     def clean_url(url):
 267         # externals URLs are redirected through youtube.com/redirect, but we
 268         # may encounter internal URLs, too
 269         return parse_qs(urlparse(url).query).get('q',[url])[0]
 270     # Remove left-/rightmost word from string:
 271     delL = lambda s: s.partition(' ')[2]
 272     delR = lambda s: s.rpartition(' ')[0]
 273     # Thousands seperator aware int():
 274     intT = lambda s: int(s.replace(',', ''))
 275
 276     def parse_infocard(card):
 277         card = card['cardRenderer']
 278         ctype = list(card['content'].keys())[0]
 279         content = card['content'][ctype]
 280         if ctype == "pollRenderer":
 281             ctype = "POLL"
 282             content = {
 283                 'question': content['question']['simpleText'],
 284                 'answers': [(a['text']['simpleText'],a['numVotes']) \
 285                     for a in content['choices']],
 286             }
 287         elif ctype == "videoInfoCardContentRenderer":
 288             ctype = "VIDEO"
 289             # if the card references a live stream, it has no length, but a "LIVE NOW" badge.
 290             # TODO: this is ugly; cleanup.
 291             is_live = content.get('badge',{}).get('liveBadgeRenderer',{})
 292             length = is_live.get('label',{}).get('simpleText') or content['lengthString']['simpleText']  # '23:03'
 293             content = {
 294                 'video_id': content['action']['watchEndpoint']['videoId'],
 295                 'title': content['videoTitle']['simpleText'],
 296                 'author': delL(content['channelName']['simpleText']),
 297                 'length': length,
 298                 'views': intT(delR(content['viewCountText']['simpleText'])),
 299             }
 300         elif ctype == "playlistInfoCardContentRenderer":
 301             ctype = "PLAYLIST"
 302             content = {
 303                 'playlist_id': content['action']['watchEndpoint']['playlistId'],
 304                 'video_id': content['action']['watchEndpoint']['videoId'],
 305                 'title': content['playlistTitle']['simpleText'],
 306                 'author': delL(content['channelName']['simpleText']),
 307                 'n_videos': intT(content['playlistVideoCount']['simpleText']),
 308             }
 309         elif ctype == "simpleCardContentRenderer" and 'urlEndpoint' in content['command']:
 310             ctype = "WEBSITE"
 311             content = {
 312                 'url': clean_url(content['command']['urlEndpoint']['url']),
 313                 'domain': content['displayDomain']['simpleText'],
 314                 'title': content['title']['simpleText'],
 315                 # XXX: no thumbnails for infocards
 316             }
 317         elif ctype == "collaboratorInfoCardContentRenderer":
 318             ctype = "CHANNEL"
 319             content = {
 320                 'channel_id': content['endpoint']['browseEndpoint']['browseId'],
 321                 'title': content['channelName']['simpleText'],
 322                 'icons': mkthumbs(content['channelAvatar']['thumbnails']),
 323                 'subscribers': content.get('subscriberCountText',{}).get('simpleText',''), # "545K subscribers"
 324             }
 325         else:
 326             import pprint
 327             content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
 328
 329         return {'type': ctype, 'content': content}
 330
 331     def mkthumbs(thumbs):
 332         return {e['height']: e['url'] for e in thumbs}
 333     def parse_endcard(card):
 334         card = card.get('endscreenElementRenderer', card) #only sometimes nested
 335         ctype = card['style']
 336         if ctype == "CHANNEL":
 337             content = {
 338                 'channel_id': card['endpoint']['browseEndpoint']['browseId'],
 339                 'title': card['title']['simpleText'],
 340                 'icons': mkthumbs(card['image']['thumbnails']),
 341             }
 342         elif ctype == "VIDEO":
 343             content = {
 344                 'video_id': card['endpoint']['watchEndpoint']['videoId'], # XXX: KeyError 'endpoint' exception (no idea which youtube video this was on)
 345                 'title': card['title']['simpleText'],
 346                 'length': card['videoDuration']['simpleText'],  # '12:21'
 347                 'views': delR(card['metadata']['simpleText']),
 348                 # XXX: no channel name
 349             }
 350         elif ctype == "PLAYLIST":
 351             content = {
 352                 'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
 353                 'video_id': card['endpoint']['watchEndpoint']['videoId'],
 354                 'title': card['title']['simpleText'],
 355                 'author': delL(card['metadata']['simpleText']),
 356                 'n_videos': intT(delR(card['playlistLength']['simpleText'])),
 357             }
 358         elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE":
 359             ctype = "WEBSITE"
 360             url = clean_url(card['endpoint']['urlEndpoint']['url'])
 361             content = {
 362                 'url': url,
 363                 'domain': urlparse(url).netloc,
 364                 'title': card['title']['simpleText'],
 365                 'icons': mkthumbs(card['image']['thumbnails']),
 366             }
 367         else:
 368             import pprint
 369             content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
 370
 371         return {'type': ctype, 'content': content}
 372
 373     infocards = [parse_infocard(card) for card in cards]
 374     endcards = [parse_endcard(card) for card in endsc]
 375     # combine cards to weed out duplicates. for videos and playlists prefer
 376     # infocards, for channels and websites prefer endcards, as those have more
 377     # information than the other.
 378     # if the card type is not in ident, we use the whole card for comparison
 379     # (otherwise they'd all replace each other)
 380     ident = { # ctype -> ident
 381         'VIDEO': 'video_id',
 382         'PLAYLIST': 'playlist_id',
 383         'CHANNEL': 'channel_id',
 384         'WEBSITE': 'url',
 385         'POLL': 'question',
 386     }
 387     getident = lambda c: c['content'].get(ident.get(c['type']), c)
 388     mkexclude = lambda cards, types: [getident(c) for c in cards if c['type'] in types]
 389     exclude = lambda cards, without: [c for c in cards if getident(c) not in without]
 390
 391     allcards = exclude(infocards, mkexclude(endcards,  ['CHANNEL','WEBSITE'])) + \
 392                exclude(endcards,  mkexclude(infocards, ['VIDEO','PLAYLIST']))
 393
 394     all_countries = """AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ BA BB BD
 395         BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BV BW BY BZ CA CC CD CF CG CH
 396         CI CK CL CM CN CO CR CU CV CW CX CY CZ DE DJ DK DM DO DZ EC EE EG EH ER
 397         ES ET FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GS GT
 398         GU GW GY HK HM HN HR HT HU ID IE IL IM IN IO IQ IR IS IT JE JM JO JP KE
 399         KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD
 400         ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC NE NF
 401         NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PN PR PS PT PW PY QA
 402         RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR SS ST SV SX
 403         SY SZ TC TD TF TG TH TJ TK TL TM TN TO TR TT TV TW TZ UA UG UM US UY UZ
 404         VA VC VE VG VI VN VU WF WS YE YT ZA ZM ZW""".split()
 405     whitelisted = sorted(meta2.get('availableCountries',[]))
 406     blacklisted = sorted(set(all_countries) - set(whitelisted))
 407
 408     published_at = f"{meta2['publishDate']}T00:00:00Z" # yyyy-mm-dd
 409     # 'premiere' videos (and livestreams?) have a ISO8601 date available:
 410     if 'liveBroadcastDetails' in meta2 and 'startTimestamp' in meta2['liveBroadcastDetails']: # TODO: tighten up
 411         published_at = meta2['liveBroadcastDetails']['startTimestamp']
 412
 413     return {
 414         'title': meta1['title'],
 415         'author': meta1['author'],
 416         'channel_id': meta1['channelId'],
 417         'description': meta1['shortDescription'],
 418         'published': published_at,
 419         'views': meta1['viewCount'],
 420         'length': int(meta1['lengthSeconds']),
 421         'rating': meta1['averageRating'],
 422         'category': meta2['category'],
 423         'aspectr': aspect_ratio,
 424         'unlisted': meta2['isUnlisted'],
 425         'whitelisted': whitelisted,
 426         'blacklisted': blacklisted,
 427         'poster': meta2['thumbnail']['thumbnails'][0]['url'],
 428         'infocards': infocards,
 429         'endcards': endcards,
 430         'all_cards': allcards,
 431         'subtitles': subtitles,
 432     }
 433
 434 class RedditException(Exception): pass
 435 def fetch_reddit(subreddits, sorted_by="hot", time=None, *, limit=36,
 436         count=None, before=None, after=None):
 437     """
 438     fetches data from a subreddit (or a multireddit like gif+gifs) and
 439     filters/sorts results.
 440     sorted_by values: hot, new, rising, controversial, top
 441     time values: hour, day, week, month, year, all (for top and controversial)
 442     """
 443
 444     if not subreddits:
 445         return None
 446
 447     query = {k:v for k,v in {
 448         'count':count,
 449         'before':before,
 450         'after':after,
 451         'limit':limit, # 1..100 (default 25)
 452         't': time, # hour,week,month,year,all
 453     }.items() if v}
 454     multireddit = '+'.join(subreddits)
 455     r = requests.get(f"https://old.reddit.com/r/{multireddit}/{sorted_by}.json",
 456             query, headers={'User-Agent':'Mozilla/5.0'})
 457     if not r.ok or not 'data' in r.json():
 458         raise RedditException(r.text)
 459
 460     return r.json()
 461
 462 def fetch_reddit_post(post_id):
 463     # Note: /api/info.json?id=t3_h7mjes == /by_id/t3_h7mjes.json
 464     r = requests.get(f"https://old.reddit.com/by_id/t3_{post_id}.json",
 465             headers={'User-Agent':'Mozilla/5.0'})
 466     if not r.ok or not 'data' in r.json():
 467         raise RedditException(r.text)
 468
 469     return r.json()
 470
 471 def parse_reddit_videos(data):
 472     videos = []
 473     entries = sorted(data['data']['children'],
 474             key=lambda e: e['data']['score'] > 1,
 475             reverse=True)
 476     for entry in entries:
 477         e = entry['data']
 478         if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us']:
 479             continue
 480         try:
 481             # Note: youtube.com/<video_id> is not valid (404s), but seen in the wild.
 482             video_id = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/embed/|youtube.com/)([-_0-9A-Za-z]+)', e['url']).group(1)
 483         except:
 484             continue # XXX: should we log that?
 485         if not video_id: continue
 486         videos.append({
 487             'video_id': video_id,
 488             'title': html.unescape(e['title']), # Note: we unescape and re-escape in the template
 489             'url': e['permalink'],
 490             'n_comments': e['num_comments'],
 491             'n_karma': e['score'],
 492             'subreddit': e['subreddit'],
 493             'post_id': e['id'],
 494         })
 495
 496     return videos
 497
 498 class NoFallbackException(Exception): pass
 499 def fallback_route(*args, **kwargs): # TODO: worthy as a flask-extension?
 500     """
 501     finds the next route that matches the current url rule, and executes it.
 502     args, kwargs: pass all arguments of the current route
 503     """
 504     from flask import current_app, request, g
 505     from werkzeug.exceptions import NotFound
 506
 507     # build a list of endpoints that match the current request's url rule:
 508     matching = [
 509         rule.endpoint
 510         for rule in current_app.url_map.iter_rules()
 511         if rule.rule == request.url_rule.rule
 512     ]
 513     current = matching.index(request.endpoint)
 514
 515     # since we can't change request.endpoint, we always get the original
 516     # endpoint back. so for repeated fall throughs, we use the g object to
 517     # increment how often we want to fall through.
 518     if not '_fallback_next' in g:
 519         g._fallback_next = 0
 520     g._fallback_next += 1
 521
 522     next_ep = current + g._fallback_next
 523
 524     if next_ep < len(matching):
 525         return current_app.view_functions[matching[next_ep]](*args, **kwargs)
 526     else:
 527         raise NoFallbackException
 528
 529 def websub_url_hmac(key, feed_id, timestamp, nonce):
 530     """ generate sha1 hmac, as required by websub/pubsubhubbub """
 531     sig_input = f"{feed_id}:{timestamp}:{nonce}".encode('ascii')
 532     return hmac.new(key.encode('ascii'), sig_input, hashlib.sha1).hexdigest()
 533
 534 def websub_body_hmac(key, body):
 535     return hmac.new(key.encode('ascii'), body, hashlib.sha1).hexdigest()
 536
 537 def pp(*args):
 538     from pprint import pprint
 539     import sys, codecs
 540     pprint(args, stream=codecs.getwriter("utf-8")(sys.stderr.buffer))