]>
git.gir.st - subscriptionfeed.git/blob - app/reddit/lib.py
5 class RedditException ( Exception ): pass
6 def fetch_reddit ( subreddits
, sorted_by
= "hot" , time
= None , *, limit
= 36 ,
7 count
= None , before
= None , after
= None ):
9 fetches data from a subreddit (or a multireddit like gif+gifs) and
10 filters/sorts results.
11 sorted_by values: hot, new, rising, controversial, top
12 time values: hour, day, week, month, year, all (for top and controversial)
18 query
= { k
: v
for k
, v
in {
22 'limit' : limit
, # 1..100 (default 25)
23 't' : time
, # hour,week,month,year,all
25 multireddit
= '+' . join ( subreddits
)
26 r
= requests
. get ( f
"https://old.reddit.com/r/ {multireddit} / {sorted_by} .json" ,
27 query
, headers
={ 'User-Agent' : 'Mozilla/5.0' })
28 if not r
. ok
or not 'data' in r
. json ():
29 raise RedditException ( r
. text
)
33 def fetch_reddit_post ( post_id
):
34 # Note: /api/info.json?id=t3_h7mjes == /by_id/t3_h7mjes.json
35 r
= requests
. get ( f
"https://old.reddit.com/by_id/t3_ {post_id} .json" ,
36 headers
={ 'User-Agent' : 'Mozilla/5.0' })
37 if not r
. ok
or not 'data' in r
. json ():
38 raise RedditException ( r
. text
)
42 def parse_reddit_videos ( data
):
44 data
= data
[ 'data' ] or {}
45 entries
= sorted ( data
. get ( 'children' ,[]),
46 key
= lambda e
: e
[ 'data' ][ 'score' ] > 1 ,
50 if e
[ 'domain' ] not in [ 'youtube.com' , 'youtu.be' , 'invidio.us' , 'invidious.snopyta.org' ]:
53 # Note: youtube.com/<video_id> is not valid (404s), but seen in the wild.
54 match
= re
. match ( r
'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&)?v=|youtu.be/|youtube.com/embed/|youtube.com/)([-_0-9A-Za-z]+)(?:[?&#]t=([0-9hms:]+))?' , e
[ 'url' ])
55 video_id
= match
. group ( 1 )
56 timestamp
= match
. group ( 2 )
57 maybe_length
= re
. match ( r
'.*[\[(](?:00:)?(\d\d?(?::\d\d){1,2})[\])]' , e
[ 'title' ]) # .* to match last occurence in line (probably terrible for performance?)
58 maybe_length
= maybe_length
. group ( 1 ) if maybe_length
else None
60 # 20:59:00 => 20:59 (we're assuming no video is >10h)
61 maybe_length
= re
. sub ( r
"([1-9]\d:\d\d):00" , r
"\1" , maybe_length
)
63 continue # XXX: should we log that?
64 if not video_id
: continue
67 'timestamp' : timestamp
,
68 'title' : html
. unescape ( e
[ 'title' ]), # Note: we unescape and re-escape in the template
69 'url' : e
[ 'permalink' ],
70 'n_comments' : e
[ 'num_comments' ],
71 'n_karma' : e
[ 'score' ],
72 'subreddit' : e
[ 'subreddit' ],
74 'length' : maybe_length
,