]> git.gir.st - subscriptionfeed.git/blob - app/reddit/lib.py
fix strange exception on reddit api usage
[subscriptionfeed.git] / app / reddit / lib.py
1 import re
2 import html
3 import requests
4
5 class RedditException(Exception): pass
6 def fetch_reddit(subreddits, sorted_by="hot", time=None, *, limit=36,
7 count=None, before=None, after=None):
8 """
9 fetches data from a subreddit (or a multireddit like gif+gifs) and
10 filters/sorts results.
11 sorted_by values: hot, new, rising, controversial, top
12 time values: hour, day, week, month, year, all (for top and controversial)
13 """
14
15 if not subreddits:
16 return None
17
18 query = {k:v for k,v in {
19 'count':count,
20 'before':before,
21 'after':after,
22 'limit':limit, # 1..100 (default 25)
23 't': time, # hour,week,month,year,all
24 }.items() if v}
25 multireddit = '+'.join(subreddits)
26 r = requests.get(f"https://old.reddit.com/r/{multireddit}/{sorted_by}.json",
27 query, headers={'User-Agent':'Mozilla/5.0'})
28 if not r.ok or not 'data' in r.json():
29 raise RedditException(r.text)
30
31 return r.json()
32
33 def fetch_reddit_post(post_id):
34 # Note: /api/info.json?id=t3_h7mjes == /by_id/t3_h7mjes.json
35 r = requests.get(f"https://old.reddit.com/by_id/t3_{post_id}.json",
36 headers={'User-Agent':'Mozilla/5.0'})
37 if not r.ok or not 'data' in r.json():
38 raise RedditException(r.text)
39
40 return r.json()
41
42 def parse_reddit_videos(data):
43 videos = []
44 data = data['data'] or {}
45 entries = sorted(data.get('children',[]),
46 key=lambda e: e['data']['score'] > 1,
47 reverse=True)
48 for entry in entries:
49 e = entry['data']
50 if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us', 'invidious.snopyta.org']:
51 continue
52 try:
53 # Note: youtube.com/<video_id> is not valid (404s), but seen in the wild.
54 video_id = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/embed/|youtube.com/)([-_0-9A-Za-z]+)', e['url']).group(1)
55 except:
56 continue # XXX: should we log that?
57 if not video_id: continue
58 videos.append({
59 'video_id': video_id,
60 'title': html.unescape(e['title']), # Note: we unescape and re-escape in the template
61 'url': e['permalink'],
62 'n_comments': e['num_comments'],
63 'n_karma': e['score'],
64 'subreddit': e['subreddit'],
65 'post_id': e['id'],
66 })
67
68 return videos
Imprint / Impressum