]> git.gir.st - subscriptionfeed.git/blob - app/youtube/lib.py
split common.innertube into youtube.cards and browse.innertube
[subscriptionfeed.git] / app / youtube / lib.py
1 import re
2 import requests
3 from urllib.parse import urlparse
4
5 from .cards import prepare_infocards, prepare_endcards
6 from ..common.common import video_metadata
7 from ..common.common import G
8
9 def prepare_metadata(metadata):
10 meta = metadata['videoDetails']
11
12 # Note: we could get subtitles in multiple formats directly by querying
13 # https://video.google.com/timedtext?hl=en&type=list&v=<VIDEO_ID> followed by
14 # https://www.youtube.com/api/timedtext?lang=<LANG_CODE>&v=<VIDEO_ID>&fmt={srv1|srv2|srv3|ttml|vtt},
15 # but that won't give us autogenerated subtitles (and is an extra request).
16 # we can still add &fmt= to the extracted URLs below (first one takes precedence).
17 try: # find the native language captions (assuming there is only 1 audioTrack) (any level might not exist):
18 default_track = metadata.get('captions',{}).get('playerCaptionsTracklistRenderer',{}).get('defaultAudioTrackIndex', 0)
19 main_subtitle = metadata['captions']['playerCaptionsTracklistRenderer']['audioTracks'][default_track]['captionTrackIndices']
20 except:
21 main_subtitle = -1
22 subtitles = sorted([
23 {'url':cc['baseUrl'],
24 'code':cc['languageCode'],
25 'autogenerated':cc.get('kind')=="asr",
26 'name':cc['name']|G.text,
27 'default':i==main_subtitle,
28 'query':"fmt=vtt&"+urlparse(cc['baseUrl']).query} # for our internal proxy
29 for i,cc in enumerate(metadata|G('captions')
30 |G('playerCaptionsTracklistRenderer')
31 |G('captionTracks') or [])
32 # sort order: default lang gets weight 0 (first), other manually translated weight 1, autogenerated weight 2:
33 ], key=lambda cc: (not cc['default']) + cc['autogenerated'])
34
35 endcards = prepare_endcards(metadata)
36
37 thumbs = meta['thumbnail']['thumbnails']
38 poster = sorted(thumbs, key=lambda t: t['width'], reverse=True)[0]['url']
39
40 return {
41 **video_metadata(metadata),
42 'description': meta['shortDescription'],
43 'unlisted': not meta['isCrawlable'],
44 'poster': poster,
45 'endcards': endcards,
46 'all_cards': endcards,
47 'subtitles': subtitles,
48 }
49
50 def channel_exists(feed_id):
51 feed_type = "channel_id" if re.match(r"^UC[A-Za-z0-9_-]{22}$", feed_id) else "user"
52 r = requests.head("https://www.youtube.com/feeds/videos.xml", params={
53 feed_type: feed_id,
54 })
55 return r.ok
56
57 def microformat_parser(metadata):
58 """ parses additional metadata only available with get_video_info(metaOnly=True) """
59 # WARN: breaks if metadata == None (e.g. invalid video id)
60 meta2 = metadata.get('microformat',{}).get('playerMicroformatRenderer',{})
61 all_countries = """AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ BA BB BD
62 BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BV BW BY BZ CA CC CD CF CG CH
63 CI CK CL CM CN CO CR CU CV CW CX CY CZ DE DJ DK DM DO DZ EC EE EG EH ER
64 ES ET FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GS GT
65 GU GW GY HK HM HN HR HT HU ID IE IL IM IN IO IQ IR IS IT JE JM JO JP KE
66 KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD
67 ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC NE NF
68 NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PN PR PS PT PW PY QA
69 RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR SS ST SV SX
70 SY SZ TC TD TF TG TH TJ TK TL TM TN TO TR TT TV TW TZ UA UG UM US UY UZ
71 VA VC VE VG VI VN VU WF WS YE YT ZA ZM ZW""".split()
72 whitelisted = sorted(meta2.get('availableCountries',[]))
73 blacklisted = sorted(set(all_countries) - set(whitelisted))
74 regions = (
75 'all' if not blacklisted else
76 'none' if not whitelisted else
77 f"not in {' '.join(blacklisted)}" if len(blacklisted) < len(whitelisted) else
78 f"only in {' '.join(whitelisted)}"
79 )
80 try:
81 poster = sorted(meta2['thumbnail']['thumbnails'], key=lambda t: t['width'], reverse=True)[0]['url']
82 except: poster = None
83 infocards = prepare_infocards(metadata)
84 endcards = prepare_endcards(metadata)
85 # combine cards to weed out duplicates. for videos and playlists prefer
86 # infocards, for channels and websites prefer endcards, as those have more
87 # information than the other.
88 # if the card type is not in ident, we use the whole card for comparison
89 # (otherwise they'd all replace each other)
90 ident = { # ctype -> ident
91 'VIDEO': 'video_id',
92 'PLAYLIST': 'playlist_id',
93 'CHANNEL': 'channel_id',
94 'WEBSITE': 'url',
95 'POLL': 'question',
96 }
97 getident = lambda c: c['content'].get(ident.get(c['type']), c)
98 mkexclude = lambda cards, types: [getident(c) for c in cards if c['type'] in types]
99 exclude = lambda cards, without: [c for c in cards if getident(c) not in without]
100
101 allcards = exclude(infocards, mkexclude(endcards, ['CHANNEL','WEBSITE'])) + \
102 exclude(endcards, mkexclude(infocards, ['VIDEO','PLAYLIST']))
103
104 return {
105 'published': meta2.get('publishDate'),
106 #'uploaded': meta2.get('uploadDate'),
107 #'infocards': infocards,
108 #'endcards': endcards,
109 'all_cards': allcards,
110 'poster': poster,
111 'regions': regions,
112 }
Imprint / Impressum