import re
import html
import requests
class RedditException(Exception): pass
def fetch_reddit(subreddits, sorted_by="hot", time=None, *, limit=36,
count=None, before=None, after=None):
"""
fetches data from a subreddit (or a multireddit like gif+gifs) and
filters/sorts results.
sorted_by values: hot, new, rising, controversial, top
time values: hour, day, week, month, year, all (for top and controversial)
"""
if not subreddits:
return None
query = {k:v for k,v in {
'count':count,
'before':before,
'after':after,
'limit':limit, # 1..100 (default 25)
't': time, # hour,week,month,year,all
}.items() if v}
multireddit = '+'.join(subreddits)
r = requests.get(f"https://old.reddit.com/r/{multireddit}/{sorted_by}.json",
query, headers={'User-Agent':'Mozilla/5.0'})
if not r.ok or not 'data' in r.json():
raise RedditException(r.text)
return r.json()
def fetch_reddit_post(post_id):
# Note: /api/info.json?id=t3_h7mjes == /by_id/t3_h7mjes.json
r = requests.get(f"https://old.reddit.com/by_id/t3_{post_id}.json",
headers={'User-Agent':'Mozilla/5.0'})
if not r.ok or not 'data' in r.json():
raise RedditException(r.text)
return r.json()
def parse_reddit_videos(data):
videos = []
data = data['data'] or {}
entries = sorted(data.get('children',[]),
key=lambda e: e['data']['score'] > 1,
reverse=True)
for entry in entries:
e = entry['data']
if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us', 'invidious.snopyta.org']:
continue
try:
# Note: youtube.com/ is not valid (404s), but seen in the wild.
match = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&)?v=|youtu.be/|youtube.com/(?:embed|shorts|live)/|youtube.com/)([-_0-9A-Za-z]+)(?:[?]t=([0-9hms:]+))?', e['url'])
video_id = match.group(1)
timestamp = match.group(2)
maybe_length = re.match(r'.*[\[(](?:00:)?(\d\d?(?::\d\d){1,2})[\])]', e['title']) # .* to match last occurence in line (probably terrible for performance?)
maybe_length = maybe_length.group(1) if maybe_length else None
if maybe_length:
# 20:59:00 => 20:59 (we're assuming no video is >10h)
maybe_length = re.sub(r"([1-9]\d:\d\d):00", r"\1", maybe_length)
except:
continue # XXX: should we log that?
if not video_id: continue
videos.append({
'video_id': video_id,
'timestamp': timestamp,
'title': html.unescape(e['title']), # Note: we unescape and re-escape in the template
'url': e['permalink'],
'n_comments': e['num_comments'],
'n_karma': e['score'],
'subreddit': e['subreddit'],
'post_id': e['id'],
'length': maybe_length,
})
return videos