app/youtube/lib.py

   1 from urllib.parse import urlparse
   2
   3 from ..common.common import video_metadata
   4 from ..common.innertube import prepare_infocards, prepare_endcards
   5
   6 def prepare_metadata(metadata):
   7     meta1 = metadata['videoDetails']
   8     meta2 = metadata['microformat']['playerMicroformatRenderer']
   9
  10     # the actual video streams have exact information:
  11     try:
  12         sd = metadata['streamingData']
  13         some_stream = (sd.get('adaptiveFormats',[]) + sd.get('formats',[]))[0]
  14         aspect_ratio = some_stream['width'] / some_stream['height']
  15     # if that's unavailable (e.g. on livestreams), fall back to
  16     # thumbnails (only either 4:3 or 16:9).
  17     except:
  18         some_img = meta2['thumbnail']['thumbnails'][0]
  19         aspect_ratio = some_img['width'] / some_img['height']
  20
  21     # Note: we could get subtitles in multiple formats directly by querying
  22     # https://video.google.com/timedtext?hl=en&type=list&v=<VIDEO_ID> followed by
  23     # https://www.youtube.com/api/timedtext?lang=<LANG_CODE>&v=<VIDEO_ID>&fmt={srv1|srv2|srv3|ttml|vtt},
  24     # but that won't give us autogenerated subtitles (and is an extra request).
  25     # we can still add &fmt= to the extracted URLs below (first one takes precedence).
  26     try: # find the native language captions (assuming there is only 1 audioTrack) (any level might not exist):
  27         default_track = metadata.get('captions',{}).get('playerCaptionsTracklistRenderer',{}).get('defaultAudioTrackIndex', 0)
  28         main_subtitle = metadata['captions']['playerCaptionsTracklistRenderer']['audioTracks'][default_track]['defaultCaptionTrackIndex']
  29     except:
  30         main_subtitle = -1
  31     subtitles = sorted([
  32         {'url':cc['baseUrl'],
  33          'code':cc['languageCode'],
  34          'autogenerated':cc.get('kind')=="asr",
  35          'name':cc['name']['simpleText'],
  36          'default':i==main_subtitle,
  37          'query':"fmt=vtt&"+urlparse(cc['baseUrl']).query} # for our internal proxy
  38         for i,cc in enumerate(metadata.get('captions',{})
  39             .get('playerCaptionsTracklistRenderer',{})
  40             .get('captionTracks',[]))
  41     # sort order: default lang gets weight 0 (first), other manually translated weight 1, autogenerated weight 2:
  42     ], key=lambda cc: (not cc['default']) + cc['autogenerated'])
  43
  44     infocards = prepare_infocards(metadata)
  45     endcards = prepare_endcards(metadata)
  46     # combine cards to weed out duplicates. for videos and playlists prefer
  47     # infocards, for channels and websites prefer endcards, as those have more
  48     # information than the other.
  49     # if the card type is not in ident, we use the whole card for comparison
  50     # (otherwise they'd all replace each other)
  51     ident = { # ctype -> ident
  52         'VIDEO': 'video_id',
  53         'PLAYLIST': 'playlist_id',
  54         'CHANNEL': 'channel_id',
  55         'WEBSITE': 'url',
  56         'POLL': 'question',
  57     }
  58     getident = lambda c: c['content'].get(ident.get(c['type']), c)
  59     mkexclude = lambda cards, types: [getident(c) for c in cards if c['type'] in types]
  60     exclude = lambda cards, without: [c for c in cards if getident(c) not in without]
  61
  62     allcards = exclude(infocards, mkexclude(endcards,  ['CHANNEL','WEBSITE'])) + \
  63                exclude(endcards,  mkexclude(infocards, ['VIDEO','PLAYLIST']))
  64
  65     all_countries = """AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ BA BB BD
  66         BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BV BW BY BZ CA CC CD CF CG CH
  67         CI CK CL CM CN CO CR CU CV CW CX CY CZ DE DJ DK DM DO DZ EC EE EG EH ER
  68         ES ET FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GS GT
  69         GU GW GY HK HM HN HR HT HU ID IE IL IM IN IO IQ IR IS IT JE JM JO JP KE
  70         KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD
  71         ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC NE NF
  72         NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PN PR PS PT PW PY QA
  73         RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR SS ST SV SX
  74         SY SZ TC TD TF TG TH TJ TK TL TM TN TO TR TT TV TW TZ UA UG UM US UY UZ
  75         VA VC VE VG VI VN VU WF WS YE YT ZA ZM ZW""".split()
  76     whitelisted = sorted(meta2.get('availableCountries',[]))
  77     blacklisted = sorted(set(all_countries) - set(whitelisted))
  78
  79     return {
  80         **video_metadata(metadata),
  81         'description': meta1['shortDescription'],
  82         'rating': meta1['averageRating'],
  83         'category': meta2['category'],
  84         'aspectr': aspect_ratio,
  85         'unlisted': meta2['isUnlisted'],
  86         'whitelisted': whitelisted,
  87         'blacklisted': blacklisted,
  88         'poster': meta2['thumbnail']['thumbnails'][0]['url'],
  89         'infocards': infocards,
  90         'endcards': endcards,
  91         'all_cards': allcards,
  92         'subtitles': subtitles,
  93     }
  94