# functions that deal with parsing data from youtube's internal API ("innertube")

from urllib.parse import parse_qs, urlparse

def mkthumbs(thumbs):
    output = {str(e['height']): e['url'] for e in thumbs}
    largest=next(iter(sorted(output.keys(),reverse=True,key=int)),None)
    return {**output, 'largest': largest}

def clean_url(url):
    # externals URLs are redirected through youtube.com/redirect, but we
    # may encounter internal URLs, too
    return parse_qs(urlparse(url).query).get('q',[url])[0]

# Remove left-/rightmost word from string:
delL = lambda s: s.partition(' ')[2]
delR = lambda s: s.rpartition(' ')[0]
# Thousands seperator aware int():
intT = lambda s: int(s.replace(',', ''))

def parse_result_items(items):
    """
    parses youtube search response into an easier to use format.
    """
    results = []
    for item in items:
        key = next(iter(item.keys()), None)
        if key == 'videoRenderer':
            is_live = next(iter([badge['metadataBadgeRenderer'] for badge in item[key].get('badges',[]) if 'metadataBadgeRenderer' in badge.keys()]),{}).get('style') == 'BADGE_STYLE_TYPE_LIVE_NOW'
            results.append(
                {'type': 'VIDEO', 'content': {
                    'video_id': item[key]['videoId'],
                    'title': item[key]['title']['runs'][0]['text'], # XXX: handle/concat multiple runs?
                    'author': item[key]['longBylineText']['runs'][0]['text'], # OR: ownerText (never works), shortBylineText
                    'channel_id': item[key]['ownerText']['runs'][0]['navigationEndpoint']['browseEndpoint']['browseId'], # OR: channelThumbnailSupportedRenderers.channelThumbnailWithLinkRenderer.navigationEndpoint.browseId
                    'length': item[key].get('lengthText',{}).get('simpleText') if not is_live else 'LIVE', # "44:07", "1:41:50" -- XXX: maybe absent--when?
                    'views': item[key].get('viewCountText',{}).get('simpleText'), # XXX: "123,456 views", absent on livestreams
                    # published: e.g. "1 year ago"; missing on autogenerated
                    # music 'videos', livestreams sometimes "Streamed 7 hours
                    # ago", sometimes absent.
                    'published': item[key].get('publishedTimeText',{}).get('simpleText',"").replace("Streamed ",""),
                }}
            )
        elif key == 'playlistRenderer':
            results.append(
                {'type': 'PLAYLIST', 'content': {
                    'playlist_id': item[key]['navigationEndpoint']['watchEndpoint']['playlistId'],
                    'video_id': item[key]['navigationEndpoint']['watchEndpoint']['videoId'],
                    'title': item[key]['title']['simpleText'],
                    'author': item[key]['longBylineText']['runs'][0]['text'], # OR: .shortBylineText
                    'channel_id': item[key]['longBylineText']['runs'][0]['navigationEndpoint']['browseEndpoint']['browseId'], # OR .shortBylineText
                    'n_videos': item[key]['videoCount'],
                }}
            )
        elif key == 'radioRenderer':
            # "Mix" playlists
            results.append(
                {'type': 'PLAYLIST', 'content': {
                    'playlist_id': item[key]['playlistId'], # OR: same as normal playlist
                    'video_id': item[key]['navigationEndpoint']['watchEndpoint']['videoId'],
                    'title': item[key]['title']['simpleText'],
                    'author': item[key]['longBylineText']['simpleText'], # always "YouTube"; OR: .shortBylineText
                    'channel_id': None, # xxx: nothing available
                    #'n_videos': item[key]['videoCountText']['runs'][0]['text'], # XXX: "50+ videos"
                    'n_videos': item[key]['videoCountShortText']['runs'][0]['text'], # "50+"
                }}
            )
        elif key == 'channelRenderer':
            results.append(
                {'type': 'CHANNEL', 'content': {
                    'channel_id': item[key]['channelId'],
                    'title': item[key]['title']['simpleText'],
                    'icons': mkthumbs(item[key]['thumbnail']['thumbnails']), # [{url,height,width}]
                    'subscribers': item[key]['subscriberCountText']['simpleText'], # XXX: "2.47K subscribers"
                }}
            )
        elif key == 'shelfRenderer':
            results.extend([
                item for item in parse_result_items(item[key]['content']['verticalListRenderer']['items'])
            ])
        elif key == 'movieRenderer':
            # movies to buy/rent
            pass
        elif key == 'horizontalCardListRenderer':
            # suggested searches: .cards[].searchRefinementCardRenderer.query.runs[].text
            pass
        else:
            import pprint
            content = {'error': f"{key} is not implemented; <pre>{pprint.pformat(item)}</pre>"}
            results.append({'type': key, 'content': content})
    return results

def parse_infocard(card):
    """
    parses a single infocard into a format that's easier to handle.
    """
    card = card['cardRenderer']
    ctype = list(card['content'].keys())[0]
    content = card['content'][ctype]
    if ctype == "pollRenderer":
        ctype = "POLL"
        content = {
            'question': content['question']['simpleText'],
            'answers': [(a['text']['simpleText'],a['numVotes']) \
                for a in content['choices']],
        }
    elif ctype == "videoInfoCardContentRenderer":
        ctype = "VIDEO"
        # if the card references a live stream, it has no length, but a "LIVE NOW" badge.
        # TODO: this is ugly; cleanup.
        is_live = content.get('badge',{}).get('liveBadgeRenderer',{})
        length = is_live.get('label',{}).get('simpleText') or content.get('lengthString',{}).get('simpleText')  # '23:03'
        from flask import current_app
        current_app.logger.warning(content['viewCountText']['simpleText'])
        # Starts: July 31, 2020 at 1:30 PM
        # viewCountText.simpleText might contain ^this!
        try:
            view_count = intT(delR(content['viewCountText']['simpleText']))
        except: view_count = 0
        content = {
            'video_id': content['action']['watchEndpoint']['videoId'],
            'title': content['videoTitle']['simpleText'],
            'author': delL(content['channelName']['simpleText']),
            'length': length,
            'views': view_count,
        }
    elif ctype == "playlistInfoCardContentRenderer":
        ctype = "PLAYLIST"
        content = {
            'playlist_id': content['action']['watchEndpoint']['playlistId'],
            'video_id': content['action']['watchEndpoint']['videoId'],
            'title': content['playlistTitle']['simpleText'],
            'author': delL(content['channelName']['simpleText']),
            'n_videos': intT(content['playlistVideoCount']['simpleText']),
        }
    elif ctype == "simpleCardContentRenderer" and 'urlEndpoint' in content['command']:
        ctype = "WEBSITE"
        content = {
            'url': clean_url(content['command']['urlEndpoint']['url']),
            'domain': content['displayDomain']['simpleText'],
            'title': content['title']['simpleText'],
            # XXX: no thumbnails for infocards
        }
    elif ctype == "collaboratorInfoCardContentRenderer":
        ctype = "CHANNEL"
        content = {
            'channel_id': content['endpoint']['browseEndpoint']['browseId'],
            'title': content['channelName']['simpleText'],
            'icons': mkthumbs(content['channelAvatar']['thumbnails']),
            'subscribers': content.get('subscriberCountText',{}).get('simpleText',''), # "545K subscribers"
        }
    else:
        import pprint
        content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}

    return {'type': ctype, 'content': content}

def parse_endcard(card):
    """
    parses a single endcard into a format that's easier to handle.
    """
    card = card.get('endscreenElementRenderer', card) #only sometimes nested
    ctype = card['style']
    if ctype == "CHANNEL":
        content = {
            'channel_id': card['endpoint']['browseEndpoint']['browseId'],
            'title': card['title']['simpleText'],
            'icons': mkthumbs(card['image']['thumbnails']),
        }
    elif ctype == "VIDEO":
        content = {
            'video_id': card['endpoint']['watchEndpoint']['videoId'], # XXX: KeyError 'endpoint' exception (no idea which youtube video this was on)
            'title': card['title']['simpleText'],
            'length': card['videoDuration']['simpleText'],  # '12:21'
            'views': delR(card['metadata']['simpleText']),
            # XXX: no channel name
        }
    elif ctype == "PLAYLIST":
        content = {
            'playlist_id': card['endpoint']['watchEndpoint']['playlistId'],
            'video_id': card['endpoint']['watchEndpoint']['videoId'],
            'title': card['title']['simpleText'],
            'author': delL(card['metadata']['simpleText']),
            'n_videos': intT(delR(card['playlistLength']['simpleText'])),
        }
    elif ctype == "WEBSITE" or ctype == "CREATOR_MERCHANDISE":
        ctype = "WEBSITE"
        url = clean_url(card['endpoint']['urlEndpoint']['url'])
        content = {
            'url': url,
            'domain': urlparse(url).netloc,
            'title': card['title']['simpleText'],
            'icons': mkthumbs(card['image']['thumbnails']),
        }
    else:
        import pprint
        content = {'error': f"{ctype} is not implemented; <pre>{pprint.pformat(card)}</pre>"}
    
    return {'type': ctype, 'content': content}