import re
import json
import time
import hmac
import hashlib
import sqlite3
import secrets
import requests
import requests_cache
from urllib.parse import parse_qs
from flask import Flask, render_template, request, redirect, flash, url_for, jsonify, g

from common import *

app = Flask(__name__)
app.secret_key = secrets.token_bytes(16) # XXX: generate and hard-code, or cookies and csrf-validation will fail!
# Note: currently expiring after 10 minutes. googlevideo-urls are valid for 5h59m, but this makes reddit very stale and premiere videos won't start.
requests_cache.install_cache(backend='memory', expire_after=10*60, allowable_codes=(200,))

# Note: this should only be required for the 'memory' backed cache.
from threading import Timer
def purge_cache(sec):
    requests_cache.remove_expired_responses()
    t = Timer(sec, purge_cache, args=(sec,))
    t.setDaemon(True)
    t.start()
purge_cache(10*60)

@app.route('/')
def index():
    return redirect(url_for('feed'), code=302)

@app.route('/feed/subscriptions')
def feed():
    token = request.args.get('token', 'guest')
    page = int(request.args.get('page', 0))
    with sqlite3.connect(cf['global']['database']) as conn:
        c = conn.cursor()
        c.execute("""
	   SELECT videos.id, channel_id, name, title, published, flags.display
	     FROM videos
	     JOIN channels ON videos.channel_id = channels.id
	LEFT JOIN flags ON (videos.id = flags.video_id) AND (flags.user = ?)
	    WHERE channel_id IN 
		  (SELECT channel_id FROM subscriptions WHERE user = ?)
		  AND flags.display IS NOT 'hidden'
	 ORDER BY (display = 'pinned') DESC, crawled DESC
	    LIMIT 36
	   OFFSET 36*?""", (token, token, page))
        rows = [{
            'video_id': video_id,
            'channel_id': channel_id,
            'author': author,
            'title': title,
            'published': published,
            'pinned': display == 'pinned',
        } for (video_id, channel_id, author, title, published, display) in c.fetchall()]
    return render_template('index.html.j2', rows=rows, page=page)

@app.route('/watch')
def watch():
    if not 'v' in request.args:
        return "missing video id", 400

    video_id = request.args.get('v')
    (video_url, metadata, error_type, error) = get_video_info(video_id)
    if error_type in ['initial', 'player']:
        return error, 400, {'content-type': 'text/plain',"Link": "<data:text/css,body%7Bcolor:%23eee;background:%23333%7D>; rel=stylesheet;"}

    show = request.args.get("show")
    if show == "metadata": # todo: handle the case when we have an exhausted error with no metadata returned
        return render_template('watch.html.j2', video_id=video_id, video_url=video_url, **prepare_metadata(metadata))
    elif show == "json":
        return jsonify(metadata)
    else:
        if error:
            extra = {'geolocked':'local=1', 'livestream':'raw=0'}.get(error,'')
            # if error==exhausted, metadata.playabilityStatus.reason may contain additional information.
            return f"{error.upper()}: Redirecting to Invidious.", 502, {'Refresh': '2; URL=https://invidio.us/watch?v='+video_id+'&'+extra+'&raw=1','content-type': 'text/plain',"Link": "<data:text/css,body%7Bcolor:%23eee;background:%23333%7D>; rel=stylesheet;"}
        return redirect(video_url, code=307)

def prepare_metadata(metadata):
    meta1 = metadata['videoDetails']
    meta2 = metadata['microformat']['playerMicroformatRenderer']
    cards = metadata['cards']['cardCollectionRenderer']['cards'] if 'cards' in metadata else []
    endsc = metadata['endscreen']['endscreenRenderer']['elements'] if 'endscreen' in metadata else []

    #aspect_ratio = meta2['embed']['width'] / meta2['embed']['height'], # sometimes absent
    aspect_ratio = meta2['thumbnail']['thumbnails'][0]['width'] / meta2['thumbnail']['thumbnails'][0]['height'] 

    subtitles = sorted([
        {'url':cc['baseUrl'],
         'code':cc['languageCode'],
         'autogenerated':cc.get('kind')=="asr", 
         'name':cc['name']['simpleText']}
        for cc in metadata['captions']['playerCaptionsTracklistRenderer']['captionTracks']
    ], key=lambda cc: cc['autogenerated']) if 'captionTracks' in metadata['captions']['playerCaptionsTracklistRenderer'] else []

    def parse_infocard(card):
        card = card['cardRenderer']
        teaser = card['teaser']['simpleCardTeaserRenderer']['message']['simpleText']  # not used
        ctype = list(card['content'].keys())[0]
        content = card['content'][ctype]
        if ctype == "pollRenderer":
            ctype = "POLL"
            content = {
                'question': content['question']['simpleText'],
                'answers': [(a['text']['simpleText'],a['numVotes']) for a in content['choices']],
            }
        elif ctype == "videoInfoCardContentRenderer":
            ctype = "VIDEO"
            content = {
                'video_id': content['action']['watchEndpoint']['videoId'],
                'title': content['videoTitle']['simpleText'],
                'author': content['channelName']['simpleText'],   # 'by xXxXx'
                'length': content['lengthString']['simpleText'],  # '23:03'
                'views': content['viewCountText']['simpleText'],  # '421,248 views'
            }
        elif ctype == "playlistInfoCardContentRenderer":
            ctype = "PLAYLIST"
            content = {
                'playlist_id': content['action']['watchEndpoint']['playlistId'],
                'video_id': content['action']['watchEndpoint']['videoId'], # XXX: untested
                'title': content['playlistTitle']['simpleText'],
                'author': content['channelName']['simpleText'],
                'n_videos': content['videoCountText']['simpleText'],
            }
        elif ctype == "simpleCardContentRenderer" and 'urlEndpoint' in content.get('command',{}).keys():
            ctype = "WEBSITE"
            content = {
                'url': parse_qs(content['command']['urlEndpoint']['url'].split('?')[1])['q'][0],
                'title': content['title']['simpleText'],
                'text': content['actionButton']['simpleCardButtonRenderer']['text']['simpleText'],
            }
        else:
            import pprint
            content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}

        return {'teaser': teaser, 'type': ctype, 'content': content}

    def parse_endcard(card):
        card = card['endscreenElementRenderer'] if 'endscreenElementRenderer' in card.keys() else card
        ctype = card['style']
        if ctype == "CHANNEL":
            content = {
                'channel_id': card['endpoint']['browseEndpoint']['browseId'],
                'title': card['title']['simpleText'],
                'icons': {e['height']: e['url'] for e in card['image']['thumbnails']},
            }
        elif ctype == "VIDEO":
            content = {
                'video_id': card['endpoint']['watchEndpoint']['videoId'],
                'title': card['title']['simpleText'],
                'length': card['videoDuration']['simpleText'], # '12:21'
                'views': card['metadata']['simpleText'],  # '51,649 views'
            }
        elif ctype == "PLAYLIST":
            content = {
                'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
                'video_id': card['endpoint']['watchEndpoint']['videoId'],
                'title': card['title']['simpleText'],
                'author': card['metadata']['simpleText'],
                'n_videos': card['playlistLength']['simpleText'],
            }
        elif ctype == "WEBSITE":
            content = {
                'url': parse_qs(card['endpoint']['urlEndpoint']['url'].split('?')[1])['q'][0],
                'title': card['title']['simpleText'],
                'icons': {e['height']: e['url'] for e in card['image']['thumbnails']},
            }
        else:
            import pprint
            content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
        
        return {'type': ctype, 'content': content}

    return {
        'title': meta1['title'],
        'author': meta1['author'],
        'channel_id': meta1['channelId'],
        'description': meta1['shortDescription'],
        'published': meta2['publishDate'],
        'views': meta1['viewCount'],
        'length': int(meta1['lengthSeconds']),
        'rating': meta1['averageRating'],
        'category': meta2['category'],
        'aspectr': aspect_ratio,
        'unlisted': meta2['isUnlisted'],
        'countries': meta2['availableCountries'],
        'infocards': [parse_infocard(card) for card in cards],
        'endcards': [parse_endcard(card) for card in endsc],
        'subtitles': subtitles,
    }

def get_video_info(video_id):
    """
    returns the best-quality muxed video stream, the player_response, error-type/-mesage
    error types: 'initial':  the request to get_video_info was malformed
                 'player':   playabilityStatus != OK
                 'internal': [livestream, geolocked, exhausted]
    """
    # TODO: caching, e.g. beaker? need to not cache premiering-soon videos/livestreams/etc, though
    #        responses are apparently valid for 6h; maybe cache for (video_length - 2h)
    # TODO: errro types? ["invalid parameters", playabilitystatus, own]
    # todo: a bit messy; should return all unscrambled video urls in best->worst quality

    # we try to fetch the video multiple times using different origins
    (sts, algo) = get_cipher()
    for el in ['embedded', 'detailpage']: # ['el-completely-absent',info,leanback,editpage,adunit,previewpage,profilepage]
        r = requests.get(f"https://www.youtube.com/get_video_info"+
            f"?video_id={video_id}"+
            f"&eurl=https://youtube.googleapis.com/v/{video_id}"+
            f"&el={el}"+
            f"&sts={sts}"+
            f"&hl=en_US") #"&hl=en&gl=US"
        params = parse_qs(r.text)
        if 'errorcode' in params: # status=fail
            return None, None, 'initial', f"MALFORMED: {params['reason'][0]}"

        metadata = json.loads(params.get('player_response')[0])
        if metadata['playabilityStatus']['status'] != "OK":
            if metadata['playabilityStatus']['status'] == "UNPLAYABLE":
                continue  # try again with different 'el' value. if none succeeds, we fall into "exhausted" path, which returns last tried metadata, from which the playabilityStatus.reason can be extracted. according to jwz/youtubedown, the worst error message comes from embedded, which is tried first, so it should be overwritten by a better message.
            return None, None, 'player', f"{metadata['playabilityStatus']['status']}: {metadata['playabilityStatus']['reason']}"
        if 'liveStreamability' in metadata['playabilityStatus']:
            return None, metadata, 'internal', "livestream" # can also check .microformat.liveBroadcastDetails.isLiveNow

        formats = metadata['streamingData']['formats']
        for (i,v) in enumerate(formats):
            if not ('cipher' in v or 'signatureCipher' in v): continue
            cipher = parse_qs(v.get('cipher') or v.get('signatureCipher'))
            formats[i]['url'] = unscramble(cipher)

        # todo: check if we have urls or try again
        url = sorted(formats, key=lambda k: k['height'], reverse=True)[0]['url']

        if 'gcr' in parse_qs(url):
            return None, metadata, 'internal', "geolocked"

        return url, metadata, None, None
    else:
        return None, metadata, 'internal', "exhausted"

def unscramble(cipher):  # test video id: UxxajLWwzqY
    signature = list(cipher['s'][0])
    (sts, algo) = get_cipher()
    for c in algo.split():
        op, ix = re.match(r"([rsw])(\d+)?", c).groups()
        if not op: continue
        if op == 'r': signature = list(reversed(signature))
        if op == 's': signature = signature[int(ix):]
        if op == 'w': signature[0], signature[int(ix)%len(signature)] = signature[int(ix)%len(signature)], signature[0]
    sp = cipher.get('sp', ['signature'])[0]
    sig = cipher['sig'][0] if 'sig' in cipher else ''.join(signature)
    return f"{cipher['url'][0]}&{sp}={sig}"

@app.route('/channel/<channel_id>')
def channel(channel_id):
    if not re.match(r"(UC[A-Za-z0-9_-]{22})", channel_id):
        return "bad channel id", 400 # todo

    xmlfeed = fetch_xml("channel_id", channel_id)
    if not xmlfeed:
        return "not found or something", 404 # XXX
    (title, author, _, videos) = parse_xml(xmlfeed)
    return render_template('xmlfeed.html.j2', title=author, rows=videos)

@app.route('/playlist')
def playlist():
    playlist_id = request.args.get('list')
    if not playlist_id:
        return "bad list id", 400 # todo

    xmlfeed = fetch_xml("playlist_id", playlist_id)
    if not xmlfeed:
        return "not found or something", 404 # XXX
    (title, author, _, videos) = parse_xml(xmlfeed)
    return render_template('xmlfeed.html.j2', title=f"{title} by {author}", rows=videos)

@app.route('/subscription_manager')
def subscription_manager():
    token = request.args.get('token', 'guest')
    with sqlite3.connect(cf['global']['database']) as conn:
        #with conn.cursor() as c:
            c = conn.cursor()
            c.execute("""
		  SELECT subscriptions.channel_id, name, 
		         (subscribed_until < datetime('now')) AS obsolete
		    FROM subscriptions
		    left JOIN channels ON channels.id = subscriptions.channel_id
		    left JOIN websub ON channels.id = websub.channel_id
		   WHERE user = ?
		ORDER BY obsolete=0, name COLLATE NOCASE ASC""", (token,))
            rows = [{
                'channel_id': channel_id,
                'author': author or channel_id,
                'subscribed_until': subscribed_until
            } for (channel_id, author, subscribed_until) in c.fetchall()]
    return render_template('subscription_manager.html.j2', rows=rows)

@app.route('/feed/subscriptions', methods=['POST'])
def feed_post():
    token = request.args.get('token', 'guest')
    if token == 'guest': return "guest user is read-only", 403
    action = next(request.form.keys(), None)
    if action in ['pin', 'unpin', 'hide']:
        video_id = request.form.get(action)
        display = {
            'pin': 'pinned',
            'unpin': None,
            'hide': 'hidden',
        }[action]
        with sqlite3.connect(cf['global']['database']) as conn:
            #with conn.cursor() as c:
                c = conn.cursor()
                c.execute("""
			INSERT OR REPLACE INTO flags (user, video_id, display)
			VALUES (?, ?, ?)
                """, (token, video_id, display))
    else:
        flash(("error","unsupported action"))
    return redirect(request.url, code=303)

@app.route('/subscription_manager', methods=['POST'])
def manage_subscriptions():
    token = request.args.get('token', 'guest')
    if token == 'guest': return "guest user is read-only", 403
    if 'subscribe' in request.form:
        channel_id = request.form.get("subscribe")
        match = re.match(r"(UC[A-Za-z0-9_-]{22})", channel_id)
        if match:
            channel_id = match.group(1)
        else:
            match = re.match(r"((?:PL|LL|EC|UU|FL|UL|OL)[A-Za-z0-9_-]{10,})", channel_id)
            if match:  # NOTE: PL-playlists are 32chars, others differ in length.
                flash(("error","playlists not (yet?) supported."))
                return redirect(request.url, code=303) # TODO: dedup redirection
            else:
                flash(("error","not a valid/subscribable URI"))
                return redirect(request.url, code=303) # TODO: dedup redirection
        with sqlite3.connect(cf['global']['database']) as conn:
            #with conn.cursor() as c:
                c = conn.cursor()
                c.execute("""
			INSERT OR IGNORE INTO subscriptions (user, channel_id)
			VALUES (?, ?)
                """, (token, channel_id))
                # TODO: sql-error-handling, asynchronically calling update-subs.pl

    elif 'unsubscribe' in request.form:
        with sqlite3.connect(cf['global']['database']) as conn:
            #with conn.cursor() as c:
                c = conn.cursor()
                c.execute("""
			DELETE FROM subscriptions
			WHERE user = ? AND channel_id = ?
                """, (token, channel_id))
                # TODO: sql-error-handling, report success

    else:
        flash(("error","unsupported action"))

    return redirect(request.url, code=303)

@app.route('/r/')
def reddit_index():
    return ""
@app.route('/r/<subreddit>')
def reddit(subreddit="videos"):
    count = int(request.args.get('count', 0))
    before = request.args.get('before')
    after = request.args.get('after')
    query = '&'.join([f"{k}={v}" for k,v in [('count',count), ('before',before), ('after',after)] if v])
    r = requests.get(f"https://old.reddit.com/r/{subreddit}.json?{query}", headers={'User-Agent':'Mozilla/5.0'})
    if not r.ok or not 'data' in r.json():
        return r.text+"error retrieving reddit data", 502

    good = [e for e in r.json()['data']['children'] if e['data']['score'] > 1]
    bad  = [e for e in r.json()['data']['children'] if e['data']['score'] <=1]
    videos = []
    for entry in (good+bad):
        e = entry['data']
        if e['domain'] not in ['youtube.com', 'youtu.be', 'invidio.us']:
            continue
        video_id = re.match(r'^https?://(?:www.|m.)?(?:youtube.com/watch\?(?:.*&amp;)?v=|youtu.be/|youtube.com/embed/)([-_0-9A-Za-z]+)', e['url']).group(1)
        if not video_id: continue
        videos.append({
            'video_id': video_id,
            'title': e['title'],
            'url': e['permalink'],
            'n_comments': e['num_comments'],
            'n_karma': e['score'],
        })
    before = r.json()['data']['before']
    after = r.json()['data']['after']
    return render_template('reddit.html.j2', subreddit=subreddit, rows=videos, before=before, after=after, count=count)

def get_cipher():
    # reload cipher from database every 1 hour
    if 'cipher' not in g or time.time() - g.get('cipher_updated', 0) > 1 * 60 * 60:
        with sqlite3.connect(cf['global']['database']) as conn:
            c = conn.cursor()
            c.execute("SELECT sts, algorithm FROM cipher")
            g.cipher = c.fetchone()
            g.cipher_updated = time.time()

    return g.cipher

#@app.teardown_appcontext
#def teardown_db():
#    db = g.pop('db', None)
#
#    if db is not None:
#        db.close()

# Magic CSRF protection: This modifies outgoing HTML responses and injects a csrf token into all forms.
# All post requests are then checked if they contain the valid token.
# TODO:
# - don't use regex for injecting
# - inject a http header into all responses (that could be used by apis)
# - allow csrf token to be passed in http header, json, ...
# - a decorator on routes to opt out of verification or output munging
@app.after_request
def add_csrf_protection(response):
    if response.mimetype == "text/html":
        token = hmac.new(app.secret_key, request.remote_addr.encode('ascii'), hashlib.sha256).hexdigest()  # TODO: will fail behind reverse proxy (remote_addr always localhost)
        response.set_data( re.sub(
            rb'(<form( [a-zA-Z0-9-]+(=(\w*|\'[^\']*\'|"[^"]"*))?)*>)', # match form tags with any number of attributes and any type of quotes
            rb'\1<input type="hidden" name="csrf" value="'+token.encode('ascii')+rb'">', # hackily append a hidden input with our csrf protection value
            response.get_data()))
    return response
@app.before_request
def verify_csrf_protection():
    token = hmac.new(app.secret_key, request.remote_addr.encode('ascii'), hashlib.sha256).hexdigest()  # TODO: will fail behind reverse proxy (remote_addr always localhost)
    if request.method == "POST" and request.form.get('csrf') != token:
        return "CSRF validation failed!", 400

@app.template_filter('format_date')
def format_date(s):
    (y,m,d) = (int(n) for n in s.split('T')[0].split(' ')[0].split('-'))  # iso-dates can seperate date from time with space or 'T'
    M = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
    return f"{d} {M[m]}"

def pp(*args):
    from pprint import pprint
    import sys, codecs
    pprint(args, stream=codecs.getwriter("utf-8")(sys.stderr.buffer))

if __name__ == '__main__':
    app.run(debug=True)