app/common/common.py

   1 import os
   2 import re
   3 import json
   4 import html
   5 import base64
   6 import requests
   7 import hmac, hashlib
   8 import requests_cache
   9 import dateutil.parser
  10 from xml.etree import ElementTree
  11 from configparser import ConfigParser
  12 from datetime import datetime, timezone
  13 from urllib.parse import parse_qs, urlparse
  14
  15 cf = ConfigParser()
  16 config_filename = os.environ.get('YT_CONFIG', '/etc/yt/config.ini')
  17 cf.read(config_filename)
  18 if not 'global' in cf: # todo: full config check
  19     raise Exception("Configuration file not found or empty")
  20
  21 # Note: currently expiring after 10 minutes. googlevideo-urls are valid for 5h59m, but this makes reddit very stale and premiere videos won't start.  TODO: exipre when video is livestream/premiere/etc
  22 requests_cache.install_cache(backend='memory', expire_after=10*60, allowable_codes=(200,))
  23
  24 # Note: this should only be required for the 'memory' backed cache.
  25 # TODO: only run for long-running processes, i.e. the frontend
  26 from threading import Timer
  27 def purge_cache(sec):
  28     requests_cache.remove_expired_responses()
  29     t = Timer(sec, purge_cache, args=(sec,))
  30     t.setDaemon(True)
  31     t.start()
  32 purge_cache(10*60)
  33
  34 # for debugging purposes, monkey patch requests session to store each requests-request in a flask-request's g object (url and response). we can then use a flask error_handler to include the request data in the error log.
  35 # since we also call config from outside the flask appcontext, it is wrapped in a try-catch block.
  36 from flask import g
  37 import requests
  38 from requests import Session as OriginalSession
  39 class _NSASession(OriginalSession):
  40     def request(self, method, url, params=None, data=None, **kwargs):
  41         response = super(_NSASession, self).request(
  42             method, url, params, data, **kwargs
  43             )
  44         try:
  45             if 'api_requests' not in g:
  46                 g.api_requests = []
  47             g.api_requests.append((url, params, response.text))
  48         except RuntimeError: pass # not within flask (e.g. utils.py)
  49         return response
  50 requests.Session = requests.sessions.Session = _NSASession
  51
  52 def fetch_xml(feed_type, feed_id):
  53     # TODO: handle requests.exceptions.ConnectionError
  54     r = requests.get("https://www.youtube.com/feeds/videos.xml", {
  55         feed_type: feed_id,
  56     })
  57     if not r.ok:
  58         return None
  59
  60     return r.content
  61
  62 def parse_xml(xmldata):
  63     ns = {
  64         'atom':"http://www.w3.org/2005/Atom",
  65         'yt': "http://www.youtube.com/xml/schemas/2015",
  66         'media':"http://search.yahoo.com/mrss/",
  67         'at': "http://purl.org/atompub/tombstones/1.0",
  68     }
  69
  70     feed = ElementTree.fromstring(xmldata)
  71
  72     if feed.find('at:deleted-entry',ns):
  73         (_,_,vid) = feed.find('at:deleted-entry',ns).get('ref').rpartition(':')
  74         return None, None, [{'deleted': True, 'video_id': vid}]
  75
  76     title = feed.find('atom:title',ns).text
  77     author = feed.find('atom:author/atom:name',ns).text \
  78         if feed.find('atom:author',ns) else None
  79     videos = []
  80     for entry in feed.findall('atom:entry',ns):
  81         videos.append({
  82             'video_id': entry.find('yt:videoId',ns).text,
  83             'title': entry.find('atom:title',ns).text,
  84             'published': entry.find('atom:published',ns).text,
  85             'channel_id': entry.find('yt:channelId',ns).text,
  86             'author': entry.find('atom:author',ns).find('atom:name',ns).text,
  87             # extra fields for pull_subs/webhook:
  88             'updated': entry.find('atom:updated',ns).text,
  89         })
  90
  91     return title, author, videos
  92
  93 def update_channel(db, xmldata, from_webhook=False):
  94     if not xmldata: return False
  95
  96     # Note: websub does not return global author, hence taking from first video
  97     _, _, videos = parse_xml(xmldata)
  98
  99     c = db.cursor()
 100     from flask import current_app # XXX: remove
 101     for i, video in enumerate(videos):
 102         if video.get('deleted'):
 103             if from_webhook: current_app.logger.warning(f"ignoring deleted video {video['video_id']}") # XXX: remove
 104             # TODO: enable once we enforce hmac validation:
 105             #c.execute("DELETE FROM videos WHERE id = ?", (video['video_id'],))
 106             break
 107
 108         now = datetime.now(timezone.utc)
 109         updated = dateutil.parser.parse(video['updated'])
 110         published = dateutil.parser.parse(video['published'])
 111         # if update and published time are near-identical, we assume it's new.
 112         # checking if it was posted this week is necessary during xmlfeed pulling.
 113         if (updated - published).seconds < 60 and (now - published).days < 7:
 114             timestamp = now
 115             if from_webhook: current_app.logger.warning(f"fresh video {video['video_id']}") # XXX: remove
 116         else:#, it might just an update to an older video, or a previously unlisted one.
 117             # first, assume it's an older video (correct when pulling xmlfeeds)
 118             timestamp = published
 119             # then, check if we don't know about it and if so, look up the real date.
 120
 121             # The 'published' timestamp sent in websub POSTs are often wrong (e.g.:
 122             # video gets uploaded as unlisted on day A and set to public on day B;
 123             # the webhook is sent on day B, but 'published' says A. The video
 124             # therefore looks like it's just an update to an older video). If
 125             # that's the case, we fetch get_video_info and double-check.
 126             # We only need to do this to not-yet-in-the-database videos.
 127             c.execute("SELECT 1 from videos where id = ?", (video['video_id'],))
 128             new_video = len(c.fetchall()) < 1
 129             if from_webhook: current_app.logger.warning(f"video {video['video_id']}") # XXX: remove
 130             if from_webhook and new_video:
 131                 if from_webhook: current_app.logger.warning(f"  is webhook and new") # XXX: remove
 132                 _, meta, _, _ = get_video_info(video['video_id'])
 133                 if meta:
 134                     meta = prepare_metadata(meta)
 135                     published = dateutil.parser.parse(meta['published'])
 136                     if from_webhook: current_app.logger.warning(f"  uploaded {published}") # XXX: remove
 137                     if (now - published).days < 7:
 138                         timestamp = now
 139                     else:#, it's just an update to an older video.
 140                         timestamp = published
 141
 142         c.execute("""
 143             INSERT OR IGNORE INTO videos (id, channel_id, title, published, crawled)
 144                            VALUES (?, ?, ?, datetime(?), datetime(?))
 145         """, (
 146             video['video_id'],
 147             video['channel_id'],
 148             video['title'],
 149             video['published'],
 150             timestamp
 151         ))
 152
 153         if i == 0: # only required once per feed
 154             c.execute("""
 155                 INSERT OR REPLACE INTO channels (id, name)
 156                                 VALUES (?, ?)
 157             """, (video['channel_id'], video['author']))
 158     db.commit()
 159
 160     return True
 161
 162 def get_video_info(video_id, sts=0, algo=""):
 163     """
 164     returns: best-quality muxed video stream, player_response, error-type/mesage
 165     error types: player, malformed, livestream, geolocked, exhausted
 166     """
 167     player_error = None # for 'exhausted'
 168     for el in ['embedded', 'detailpage']:#sometimes, only one or the other works
 169         r = requests.get("https://www.youtube.com/get_video_info", {
 170             "video_id": video_id,
 171             "eurl": f"https://youtube.googleapis.com/v/{video_id}",
 172             "el": el,
 173             "sts": sts,
 174             "hl": "en_US",
 175         })
 176         params = parse_qs(r.text)
 177         if 'errorcode' in params: # status=fail
 178             return None, None, 'malformed', params['reason'][0]
 179
 180         metadata = json.loads(params.get('player_response')[0])
 181         playabilityStatus = metadata['playabilityStatus']['status']
 182         if playabilityStatus != "OK":
 183             playabilityReason = metadata['playabilityStatus'].get('reason',
 184                     '//'.join(metadata['playabilityStatus'].get('messages',[])))
 185             player_error = f"{playabilityStatus}: {playabilityReason}"
 186             if playabilityStatus == "UNPLAYABLE":
 187                 continue  # try again with next el value (or fail as exhausted)
 188             # without videoDetails, there's only the error message
 189             maybe_metadata = metadata if 'videoDetails' in metadata else None
 190             return None, maybe_metadata, 'player', player_error
 191         if metadata['videoDetails']['isLiveContent'] and \
 192                 (metadata['videoDetails'].get('isLive', False) or \
 193                 metadata['videoDetails'].get('isPostLiveDvr', False)):
 194             return None, metadata, 'livestream', None
 195
 196         if not 'formats' in metadata['streamingData']:
 197             continue # no urls
 198
 199         formats = metadata['streamingData']['formats']
 200         for (i,v) in enumerate(formats):
 201             if not ('cipher' in v or 'signatureCipher' in v): continue
 202             cipher = parse_qs(v.get('cipher') or v.get('signatureCipher'))
 203             formats[i]['url'] = unscramble(cipher, algo)
 204
 205         # todo: check if we have urls or try again
 206         url = sorted(formats, key=lambda k: k['height'], reverse=True)[0]['url']
 207
 208         if 'gcr' in parse_qs(url):
 209             return None, metadata, 'geolocked', None
 210
 211         return url, metadata, None, None
 212     else:
 213         return None, metadata, 'exhausted', player_error
 214
 215 def unscramble(cipher, algo):  # test video id: UxxajLWwzqY
 216     signature = list(cipher['s'][0])
 217     for c in algo.split():
 218         op, ix = re.match(r"([rsw])(\d+)?", c).groups()
 219         ix = int(ix) % len(signature) if ix else 0
 220         if not op: continue
 221         if op == 'r': signature = list(reversed(signature))
 222         if op == 's': signature = signature[ix:]
 223         if op == 'w': signature[0], signature[ix] = signature[ix], signature[0]
 224     sp = cipher.get('sp', ['signature'])[0]
 225     sig = cipher.get('sig', [''.join(signature)])[0]
 226     return f"{cipher['url'][0]}&{sp}={sig}"
 227
 228 def prepare_metadata(metadata):
 229     meta1 = metadata['videoDetails']
 230     meta2 = metadata['microformat']['playerMicroformatRenderer']
 231     cards = metadata['cards']['cardCollectionRenderer']['cards'] \
 232         if 'cards' in metadata else []
 233     endsc = metadata['endscreen']['endscreenRenderer']['elements'] \
 234         if 'endscreen' in metadata else []
 235
 236     # the actual video streams have exact information:
 237     try:
 238         sd = metadata['streamingData']
 239         some_stream = (sd.get('adaptiveFormats',[]) + sd.get('formats',[]))[0]
 240         aspect_ratio = some_stream['width'] / some_stream['height']
 241     # if that's unavailable (e.g. on livestreams), fall back to
 242     # thumbnails (only either 4:3 or 16:9).
 243     except:
 244         some_img = meta2['thumbnail']['thumbnails'][0]
 245         aspect_ratio = some_img['width'] / some_img['height']
 246
 247     # Note: we could get subtitles in multiple formats directly by querying
 248     # https://video.google.com/timedtext?hl=en&type=list&v=<VIDEO_ID> followed by
 249     # https://www.youtube.com/api/timedtext?lang=<LANG_CODE>&v=<VIDEO_ID>&fmt={srv1|srv2|srv3|ttml|vtt},
 250     # but that won't give us autogenerated subtitles (and is an extra request).
 251     # we can still add &fmt= to the extracted URLs below (first one takes precedence).
 252     subtitles = sorted([
 253         {'url':cc['baseUrl'],
 254          'code':cc['languageCode'],
 255          'autogenerated':cc.get('kind')=="asr",
 256          'name':cc['name']['simpleText'],
 257          'query':"fmt=vtt&"+urlparse(cc['baseUrl']).query} # for our internal proxy
 258         for cc in metadata.get('captions',{})
 259             .get('playerCaptionsTracklistRenderer',{})
 260             .get('captionTracks',[])
 261     ], key=lambda cc: cc['autogenerated'])
 262
 263     def clean_url(url):
 264         # externals URLs are redirected through youtube.com/redirect, but we
 265         # may encounter internal URLs, too
 266         return parse_qs(urlparse(url).query).get('q',[url])[0]
 267     # Remove left-/rightmost word from string:
 268     delL = lambda s: s.partition(' ')[2]
 269     delR = lambda s: s.rpartition(' ')[0]
 270     # Thousands seperator aware int():
 271     intT = lambda s: int(s.replace(',', ''))
 272
 273     def parse_infocard(card):
 274         card = card['cardRenderer']
 275         ctype = list(card['content'].keys())[0]
 276         content = card['content'][ctype]
 277         if ctype == "pollRenderer":
 278             ctype = "POLL"
 279             content = {
 280                 'question': content['question']['simpleText'],
 281                 'answers': [(a['text']['simpleText'],a['numVotes']) \
 282                     for a in content['choices']],
 283             }
 284         elif ctype == "videoInfoCardContentRenderer":
 285             ctype = "VIDEO"
 286             # if the card references a live stream, it has no length, but a "LIVE NOW" badge.
 287             # TODO: this is ugly; cleanup.
 288             is_live = content.get('badge',{}).get('liveBadgeRenderer',{})
 289             length = is_live.get('label',{}).get('simpleText') or content['lengthString']['simpleText']  # '23:03'
 290             content = {
 291                 'video_id': content['action']['watchEndpoint']['videoId'],
 292                 'title': content['videoTitle']['simpleText'],
 293                 'author': delL(content['channelName']['simpleText']),
 294                 'length': length,
 295                 'views': intT(delR(content['viewCountText']['simpleText'])),
 296             }
 297         elif ctype == "playlistInfoCardContentRenderer":
 298             ctype = "PLAYLIST"
 299             content = {
 300                 'playlist_id': content['action']['watchEndpoint']['playlistId'],
 301                 'video_id': content['action']['watchEndpoint']['videoId'],
 302                 'title': content['playlistTitle']['simpleText'],
 303                 'author': delL(content['channelName']['simpleText']),
 304                 'n_videos': intT(content['playlistVideoCount']['simpleText']),
 305             }
 306         elif ctype == "simpleCardContentRenderer" and 'urlEndpoint' in content['command']:
 307             ctype = "WEBSITE"
 308             content = {
 309                 'url': clean_url(content['command']['urlEndpoint']['url']),
 310                 'domain': content['displayDomain']['simpleText'],
 311                 'title': content['title']['simpleText'],
 312                 # XXX: no thumbnails for infocards
 313             }
 314         elif ctype == "collaboratorInfoCardContentRenderer":
 315             ctype = "CHANNEL"
 316             content = {
 317                 'channel_id': content['endpoint']['browseEndpoint']['browseId'],
 318                 'title': content['channelName']['simpleText'],
 319                 'icons': mkthumbs(content['channelAvatar']['thumbnails']),
 320                 'subscribers': content.get('subscriberCountText',{}).get('simpleText',''), # "545K subscribers"
 321             }
 322         else:
 323             import pprint
 324             content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
 325
 326         return {'type': ctype, 'content': content}
 327
 328     def mkthumbs(thumbs):
 329         return {e['height']: e['url'] for e in thumbs}
 330     def parse_endcard(card):
 331         card = card.get('endscreenElementRenderer', card) #only sometimes nested
 332         ctype = card['style']
 333         if ctype == "CHANNEL":
 334             content = {
 335                 'channel_id': card['endpoint']['browseEndpoint']['browseId'],
 336                 'title': card['title']['simpleText'],
 337                 'icons': mkthumbs(card['image']['thumbnails']),
 338             }
 339         elif ctype == "VIDEO":
 340             content = {
 341                 'video_id': card['endpoint']['watchEndpoint']['videoId'], # XXX: KeyError 'endpoint' exception (no idea which youtube video this was on)
 342                 'title': card['title']['simpleText'],
 343                 'length': card['videoDuration']['simpleText'],  # '12:21'
 344                 'views': delR(card['metadata']['simpleText']),
 345                 # XXX: no channel name
 346             }
 347         elif ctype == "PLAYLIST":
 348             content = {
 349                 'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
 350                 'video_id': card['endpoint']['watchEndpoint']['videoId'],
 351                 'title': card['title']['simpleText'],
 352                 'author': delL(card['metadata']['simpleText']),
 353                 'n_videos': intT(delR(card['playlistLength']['simpleText'])),
 354             }
 355         elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE":
 356             ctype = "WEBSITE"
 357             url = clean_url(card['endpoint']['urlEndpoint']['url'])
 358             content = {
 359                 'url': url,
 360                 'domain': urlparse(url).netloc,
 361                 'title': card['title']['simpleText'],
 362                 'icons': mkthumbs(card['image']['thumbnails']),
 363             }
 364         else:
 365             import pprint
 366             content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
 367
 368         return {'type': ctype, 'content': content}
 369
 370     infocards = [parse_infocard(card) for card in cards]
 371     endcards = [parse_endcard(card) for card in endsc]
 372     # combine cards to weed out duplicates. for videos and playlists prefer
 373     # infocards, for channels and websites prefer endcards, as those have more
 374     # information than the other.
 375     # if the card type is not in ident, we use the whole card for comparison
 376     # (otherwise they'd all replace each other)
 377     ident = { # ctype -> ident
 378         'VIDEO': 'video_id',
 379         'PLAYLIST': 'playlist_id',
 380         'CHANNEL': 'channel_id',
 381         'WEBSITE': 'url',
 382         'POLL': 'question',
 383     }
 384     getident = lambda c: c['content'].get(ident.get(c['type']), c)
 385     mkexclude = lambda cards, types: [getident(c) for c in cards if c['type'] in types]
 386     exclude = lambda cards, without: [c for c in cards if getident(c) not in without]
 387
 388     allcards = exclude(infocards, mkexclude(endcards,  ['CHANNEL','WEBSITE'])) + \
 389                exclude(endcards,  mkexclude(infocards, ['VIDEO','PLAYLIST']))
 390
 391     all_countries = """AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ BA BB BD
 392         BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BV BW BY BZ CA CC CD CF CG CH
 393         CI CK CL CM CN CO CR CU CV CW CX CY CZ DE DJ DK DM DO DZ EC EE EG EH ER
 394         ES ET FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GS GT
 395         GU GW GY HK HM HN HR HT HU ID IE IL IM IN IO IQ IR IS IT JE JM JO JP KE
 396         KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD
 397         ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC NE NF
 398         NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PN PR PS PT PW PY QA
 399         RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR SS ST SV SX
 400         SY SZ TC TD TF TG TH TJ TK TL TM TN TO TR TT TV TW TZ UA UG UM US UY UZ
 401         VA VC VE VG VI VN VU WF WS YE YT ZA ZM ZW""".split()
 402     whitelisted = sorted(meta2.get('availableCountries',[]))
 403     blacklisted = sorted(set(all_countries) - set(whitelisted))
 404
 405     published_at = f"{meta2['publishDate']}T00:00:00Z" # yyyy-mm-dd
 406     # 'premiere' videos (and livestreams?) have a ISO8601 date available:
 407     if 'liveBroadcastDetails' in meta2 and 'startTimestamp' in meta2['liveBroadcastDetails']: # TODO: tighten up
 408         published_at = meta2['liveBroadcastDetails']['startTimestamp']
 409
 410     return {
 411         'title': meta1['title'],
 412         'author': meta1['author'],
 413         'channel_id': meta1['channelId'],
 414         'description': meta1['shortDescription'],
 415         'published': published_at,
 416         'views': meta1['viewCount'],
 417         'length': int(meta1['lengthSeconds']),
 418         'rating': meta1['averageRating'],
 419         'category': meta2['category'],
 420         'aspectr': aspect_ratio,
 421         'unlisted': meta2['isUnlisted'],
 422         'whitelisted': whitelisted,
 423         'blacklisted': blacklisted,
 424         'poster': meta2['thumbnail']['thumbnails'][0]['url'],
 425         'infocards': infocards,
 426         'endcards': endcards,
 427         'all_cards': allcards,
 428         'subtitles': subtitles,
 429     }
 430
 431 class RedditException(Exception): pass
 432 def fetch_reddit(subreddits, sorted_by="hot", time=None, *, limit=36,
 433         count=None, before=None, after=None):
 434     """
 435     fetches data from a subreddit (or a multireddit like gif+gifs) and
 436     filters/sorts results.
 437     sorted_by values: hot, new, rising, controversial, top
 438     time values: hour, day, week, month, year, all (for top and controversial)
 439     """
 440
 441     if not subreddits:
 442         return None
 443
 444     query = {k:v for k,v in {
 445         'count':count,
 446         'before':before,
 447         'after':after,
 448         'limit':limit, # 1..100 (default 25)
 449         't': time, # hour,week,month,year,all
 450     }.items() if v}
 451     multireddit = '+'.join(subreddits)
 452     r = requests.get(f"https://old.reddit.com/r/{multireddit}/{sorted_by}.json",
 453             query, headers={'User-Agent':'Mozilla/5.0'})
 454     if not r.ok or not 'data' in r.json():
 455         raise RedditException(r.text)
 456
 457     return r.json()
 458
 459 def fetch_reddit_post(post_id):
 460     # Note: /api/info.json?id=t3_h7mjes == /by_id/t3_h7mjes.json
 461     r = requests.get(f"https://old.reddit.com/by_id/t3_{post_id}.json",
 462             headers={'User-Agent':'Mozilla/5.0'})
 463     if not r.ok or not 'data' in r.json():
 464         raise RedditException(r.text)
 465
 466     return r.json()
 467
 468 def parse_reddit_videos(data):
 469     videos = []
 470     entries = sorted(data['data']['children'],
 471             key=lambda e: e['data']['score'] > 1,
 472             reverse=True)
 473     for entry in entries:
 474         e = entry['data']
 475         if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us']:
 476             continue
 477         try:
 478             # Note: youtube.com/<video_id> is not valid (404s), but seen in the wild.
 479             video_id = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/embed/|youtube.com/)([-_0-9A-Za-z]+)', e['url']).group(1)
 480         except:
 481             continue # XXX: should we log that?
 482         if not video_id: continue
 483         videos.append({
 484             'video_id': video_id,
 485             'title': html.unescape(e['title']), # Note: we unescape and re-escape in the template
 486             'url': e['permalink'],
 487             'n_comments': e['num_comments'],
 488             'n_karma': e['score'],
 489             'subreddit': e['subreddit'],
 490             'post_id': e['id'],
 491         })
 492
 493     return videos
 494
 495 class NoFallbackException(Exception): pass
 496 def fallback_route(*args, **kwargs): # TODO: worthy as a flask-extension?
 497     """
 498     finds the next route that matches the current url rule, and executes it.
 499     args, kwargs: pass all arguments of the current route
 500     """
 501     from flask import current_app, request, g
 502     from werkzeug.exceptions import NotFound
 503
 504     # build a list of endpoints that match the current request's url rule:
 505     matching = [
 506         rule.endpoint
 507         for rule in current_app.url_map.iter_rules()
 508         if rule.rule == request.url_rule.rule
 509     ]
 510     current = matching.index(request.endpoint)
 511
 512     # since we can't change request.endpoint, we always get the original
 513     # endpoint back. so for repeated fall throughs, we use the g object to
 514     # increment how often we want to fall through.
 515     if not '_fallback_next' in g:
 516         g._fallback_next = 0
 517     g._fallback_next += 1
 518
 519     next_ep = current + g._fallback_next
 520
 521     if next_ep < len(matching):
 522         return current_app.view_functions[matching[next_ep]](*args, **kwargs)
 523     else:
 524         raise NoFallbackException
 525
 526 def websub_url_hmac(key, feed_id, timestamp, nonce):
 527     """ generate sha1 hmac, as required by websub/pubsubhubbub """
 528     sig_input = f"{feed_id}:{timestamp}:{nonce}".encode('ascii')
 529     return hmac.new(key.encode('ascii'), sig_input, hashlib.sha1).hexdigest()
 530
 531 def websub_body_hmac(key, body):
 532     return hmac.new(key.encode('ascii'), body, hashlib.sha1).hexdigest()
 533
 534 def pp(*args):
 535     from pprint import pprint
 536     import sys, codecs
 537     pprint(args, stream=codecs.getwriter("utf-8")(sys.stderr.buffer))