app/youtube/lib.py

   1 import re
   2 import requests
   3 from urllib.parse import urlparse
   4
   5 from ..common.common import video_metadata
   6 from ..common.innertube import prepare_infocards, prepare_endcards, G
   7
   8 def prepare_metadata(metadata):
   9     meta = metadata['videoDetails']
  10
  11     # the actual video streams have exact information:
  12     try:
  13         sd = metadata['streamingData']
  14         some_stream = (sd.get('adaptiveFormats',[]) + sd.get('formats',[]))[0]
  15         aspect_ratio = some_stream['width'] / some_stream['height']
  16     # if that's unavailable (e.g. on livestreams), fall back to 16:9
  17     except:
  18         aspect_ratio = 16/9
  19
  20     # Note: we could get subtitles in multiple formats directly by querying
  21     # https://video.google.com/timedtext?hl=en&type=list&v=<VIDEO_ID> followed by
  22     # https://www.youtube.com/api/timedtext?lang=<LANG_CODE>&v=<VIDEO_ID>&fmt={srv1|srv2|srv3|ttml|vtt},
  23     # but that won't give us autogenerated subtitles (and is an extra request).
  24     # we can still add &fmt= to the extracted URLs below (first one takes precedence).
  25     try: # find the native language captions (assuming there is only 1 audioTrack) (any level might not exist):
  26         default_track = metadata.get('captions',{}).get('playerCaptionsTracklistRenderer',{}).get('defaultAudioTrackIndex', 0)
  27         main_subtitle = metadata['captions']['playerCaptionsTracklistRenderer']['audioTracks'][default_track]['captionTrackIndices']
  28     except:
  29         main_subtitle = -1
  30     subtitles = sorted([
  31         {'url':cc['baseUrl'],
  32          'code':cc['languageCode'],
  33          'autogenerated':cc.get('kind')=="asr",
  34          'name':cc['name']|G.text,
  35          'default':i==main_subtitle,
  36          'query':"fmt=vtt&"+urlparse(cc['baseUrl']).query} # for our internal proxy
  37         for i,cc in enumerate(metadata|G('captions')
  38             |G('playerCaptionsTracklistRenderer')
  39             |G('captionTracks') or [])
  40     # sort order: default lang gets weight 0 (first), other manually translated weight 1, autogenerated weight 2:
  41     ], key=lambda cc: (not cc['default']) + cc['autogenerated'])
  42
  43     endcards = prepare_endcards(metadata)
  44
  45     # the rating goes from 1 to 5, and is the ratio of up- to down votes, plus 1
  46     if meta.get('averageRating', 0) != 0:
  47         thumbs_up = 100 * (meta['averageRating']-1) / 4  # reconstructed ratio
  48         thumbs_dn = 100 - thumbs_up
  49     else:  # no thumbs given
  50         thumbs_up = 0
  51         thumbs_dn = 0
  52
  53     thumbs = meta['thumbnail']['thumbnails']
  54     poster = sorted(thumbs, key=lambda t: t['width'], reverse=True)[0]['url']
  55
  56     return {
  57         **video_metadata(metadata),
  58         'description': meta['shortDescription'],
  59         'rating': meta.get('averageRating', 0),
  60         'thumbs_up': thumbs_up,
  61         'thumbs_dn': thumbs_dn,
  62         'aspectr': aspect_ratio,
  63         'unlisted': not meta['isCrawlable'],
  64         'poster': poster,
  65         'endcards': endcards,
  66         'all_cards': endcards,
  67         'subtitles': subtitles,
  68     }
  69
  70 def channel_exists(feed_id):
  71     feed_type = "channel_id" if re.match(r"^UC[A-Za-z0-9_-]{22}$", feed_id) else "user"
  72     r = requests.head("https://www.youtube.com/feeds/videos.xml", params={
  73         feed_type: feed_id,
  74     })
  75     return r.ok