import re
import html
import requests

class RedditException(Exception): pass
def fetch_reddit(subreddits, sorted_by="hot", time=None, *, limit=36,
        count=None, before=None, after=None):
    """
    fetches data from a subreddit (or a multireddit like gif+gifs) and
    filters/sorts results.
    sorted_by values: hot, new, rising, controversial, top
    time values: hour, day, week, month, year, all (for top and controversial)
    """

    if not subreddits:
        return None

    query = {k:v for k,v in {
        'count':count,
        'before':before,
        'after':after,
        'limit':limit, # 1..100 (default 25)
        't': time, # hour,week,month,year,all
    }.items() if v}
    multireddit = '+'.join(subreddits)
    r = requests.get(f"https://old.reddit.com/r/{multireddit}/{sorted_by}.json",
            query, headers={'User-Agent':'Mozilla/5.0'})
    if not r.ok or not 'data' in r.json():
        raise RedditException(r.text)

    return r.json()

def fetch_reddit_post(post_id):
    # Note: /api/info.json?id=t3_h7mjes == /by_id/t3_h7mjes.json
    r = requests.get(f"https://old.reddit.com/by_id/t3_{post_id}.json",
            headers={'User-Agent':'Mozilla/5.0'})
    if not r.ok or not 'data' in r.json():
        raise RedditException(r.text)

    return r.json()

def parse_reddit_videos(data):
    videos = []
    data = data['data'] or {}
    entries = sorted(data.get('children',[]),
            key=lambda e: e['data']['score'] > 1,
            reverse=True)
    for entry in entries:
        e = entry['data']
        if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us', 'invidious.snopyta.org']:
            continue
        try:
            # Note: youtube.com/<video_id> is not valid (404s), but seen in the wild.
            match = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/(?:embed|shorts|live)/|youtube.com/)([-_0-9A-Za-z]+)(?:[?&#]t=([0-9hms:]+))?', e['url'])
            video_id = match.group(1)
            timestamp = match.group(2)
            maybe_length = re.match(r'.*[\[(](?:00:)?(\d\d?(?::\d\d){1,2})[\])]', e['title'])  # .* to match last occurence in line (probably terrible for performance?)
            maybe_length = maybe_length.group(1) if maybe_length else None
            if maybe_length:
                # 20:59:00 => 20:59 (we're assuming no video is >10h)
                maybe_length = re.sub(r"([1-9]\d:\d\d):00", r"\1", maybe_length)
        except:
            continue # XXX: should we log that?
        if not video_id: continue
        videos.append({
            'video_id': video_id,
            'timestamp': timestamp,
            'title': html.unescape(e['title']), # Note: we unescape and re-escape in the template
            'url': e['permalink'],
            'n_comments': e['num_comments'],
            'n_karma': e['score'],
            'subreddit': e['subreddit'],
            'post_id': e['id'],
            'length': maybe_length,
        })

    return videos