import re import html import requests class RedditException(Exception): pass def fetch_reddit(subreddits, sorted_by="hot", time=None, *, limit=36, count=None, before=None, after=None): """ fetches data from a subreddit (or a multireddit like gif+gifs) and filters/sorts results. sorted_by values: hot, new, rising, controversial, top time values: hour, day, week, month, year, all (for top and controversial) """ if not subreddits: return None query = {k:v for k,v in { 'count':count, 'before':before, 'after':after, 'limit':limit, # 1..100 (default 25) 't': time, # hour,week,month,year,all }.items() if v} multireddit = '+'.join(subreddits) r = requests.get(f"https://old.reddit.com/r/{multireddit}/{sorted_by}.json", query, headers={'User-Agent':'Mozilla/5.0'}) if not r.ok or not 'data' in r.json(): raise RedditException(r.text) return r.json() def fetch_reddit_post(post_id): # Note: /api/info.json?id=t3_h7mjes == /by_id/t3_h7mjes.json r = requests.get(f"https://old.reddit.com/by_id/t3_{post_id}.json", headers={'User-Agent':'Mozilla/5.0'}) if not r.ok or not 'data' in r.json(): raise RedditException(r.text) return r.json() def parse_reddit_videos(data): videos = [] entries = sorted(data['data']['children'], key=lambda e: e['data']['score'] > 1, reverse=True) for entry in entries: e = entry['data'] if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us']: continue try: # Note: youtube.com/ is not valid (404s), but seen in the wild. video_id = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&)?v=|youtu.be/|youtube.com/embed/|youtube.com/)([-_0-9A-Za-z]+)', e['url']).group(1) except: continue # XXX: should we log that? if not video_id: continue videos.append({ 'video_id': video_id, 'title': html.unescape(e['title']), # Note: we unescape and re-escape in the template 'url': e['permalink'], 'n_comments': e['num_comments'], 'n_karma': e['score'], 'subreddit': e['subreddit'], 'post_id': e['id'], }) return videos