app/reddit/lib.py

   1 import re
   2 import html
   3 import requests
   4
   5 class RedditException(Exception): pass
   6 def fetch_reddit(subreddits, sorted_by="hot", time=None, *, limit=36,
   7         count=None, before=None, after=None):
   8     """
   9     fetches data from a subreddit (or a multireddit like gif+gifs) and
  10     filters/sorts results.
  11     sorted_by values: hot, new, rising, controversial, top
  12     time values: hour, day, week, month, year, all (for top and controversial)
  13     """
  14
  15     if not subreddits:
  16         return None
  17
  18     query = {k:v for k,v in {
  19         'count':count,
  20         'before':before,
  21         'after':after,
  22         'limit':limit, # 1..100 (default 25)
  23         't': time, # hour,week,month,year,all
  24     }.items() if v}
  25     multireddit = '+'.join(subreddits)
  26     r = requests.get(f"https://old.reddit.com/r/{multireddit}/{sorted_by}.json",
  27             query, headers={'User-Agent':'Mozilla/5.0'})
  28     if not r.ok or not 'data' in r.json():
  29         raise RedditException(r.text)
  30
  31     return r.json()
  32
  33 def fetch_reddit_post(post_id):
  34     # Note: /api/info.json?id=t3_h7mjes == /by_id/t3_h7mjes.json
  35     r = requests.get(f"https://old.reddit.com/by_id/t3_{post_id}.json",
  36             headers={'User-Agent':'Mozilla/5.0'})
  37     if not r.ok or not 'data' in r.json():
  38         raise RedditException(r.text)
  39
  40     return r.json()
  41
  42 def parse_reddit_videos(data):
  43     videos = []
  44     data = data['data'] or {}
  45     entries = sorted(data.get('children',[]),
  46             key=lambda e: e['data']['score'] > 1,
  47             reverse=True)
  48     for entry in entries:
  49         e = entry['data']
  50         if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us', 'invidious.snopyta.org']:
  51             continue
  52         try:
  53             # Note: youtube.com/<video_id> is not valid (404s), but seen in the wild.
  54             video_id = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/embed/|youtube.com/)([-_0-9A-Za-z]+)', e['url']).group(1)
  55         except:
  56             continue # XXX: should we log that?
  57         if not video_id: continue
  58         videos.append({
  59             'video_id': video_id,
  60             'title': html.unescape(e['title']), # Note: we unescape and re-escape in the template
  61             'url': e['permalink'],
  62             'n_comments': e['num_comments'],
  63             'n_karma': e['score'],
  64             'subreddit': e['subreddit'],
  65             'post_id': e['id'],
  66         })
  67
  68     return videos