]> git.gir.st - subscriptionfeed.git/blob - app/youtube/lib.py
move parse_metadata to youtube blueprint
[subscriptionfeed.git] / app / youtube / lib.py
1 from urllib.parse import urlparse
2
3 from ..common.common import video_metadata
4 from ..common.innertube import prepare_infocards, prepare_endcards
5
6 def prepare_metadata(metadata):
7 meta1 = metadata['videoDetails']
8 meta2 = metadata['microformat']['playerMicroformatRenderer']
9
10 # the actual video streams have exact information:
11 try:
12 sd = metadata['streamingData']
13 some_stream = (sd.get('adaptiveFormats',[]) + sd.get('formats',[]))[0]
14 aspect_ratio = some_stream['width'] / some_stream['height']
15 # if that's unavailable (e.g. on livestreams), fall back to
16 # thumbnails (only either 4:3 or 16:9).
17 except:
18 some_img = meta2['thumbnail']['thumbnails'][0]
19 aspect_ratio = some_img['width'] / some_img['height']
20
21 # Note: we could get subtitles in multiple formats directly by querying
22 # https://video.google.com/timedtext?hl=en&type=list&v=<VIDEO_ID> followed by
23 # https://www.youtube.com/api/timedtext?lang=<LANG_CODE>&v=<VIDEO_ID>&fmt={srv1|srv2|srv3|ttml|vtt},
24 # but that won't give us autogenerated subtitles (and is an extra request).
25 # we can still add &fmt= to the extracted URLs below (first one takes precedence).
26 try: # find the native language captions (assuming there is only 1 audioTrack) (any level might not exist):
27 default_track = metadata.get('captions',{}).get('playerCaptionsTracklistRenderer',{}).get('defaultAudioTrackIndex', 0)
28 main_subtitle = metadata['captions']['playerCaptionsTracklistRenderer']['audioTracks'][default_track]['defaultCaptionTrackIndex']
29 except:
30 main_subtitle = -1
31 subtitles = sorted([
32 {'url':cc['baseUrl'],
33 'code':cc['languageCode'],
34 'autogenerated':cc.get('kind')=="asr",
35 'name':cc['name']['simpleText'],
36 'default':i==main_subtitle,
37 'query':"fmt=vtt&"+urlparse(cc['baseUrl']).query} # for our internal proxy
38 for i,cc in enumerate(metadata.get('captions',{})
39 .get('playerCaptionsTracklistRenderer',{})
40 .get('captionTracks',[]))
41 # sort order: default lang gets weight 0 (first), other manually translated weight 1, autogenerated weight 2:
42 ], key=lambda cc: (not cc['default']) + cc['autogenerated'])
43
44 infocards = prepare_infocards(metadata)
45 endcards = prepare_endcards(metadata)
46 # combine cards to weed out duplicates. for videos and playlists prefer
47 # infocards, for channels and websites prefer endcards, as those have more
48 # information than the other.
49 # if the card type is not in ident, we use the whole card for comparison
50 # (otherwise they'd all replace each other)
51 ident = { # ctype -> ident
52 'VIDEO': 'video_id',
53 'PLAYLIST': 'playlist_id',
54 'CHANNEL': 'channel_id',
55 'WEBSITE': 'url',
56 'POLL': 'question',
57 }
58 getident = lambda c: c['content'].get(ident.get(c['type']), c)
59 mkexclude = lambda cards, types: [getident(c) for c in cards if c['type'] in types]
60 exclude = lambda cards, without: [c for c in cards if getident(c) not in without]
61
62 allcards = exclude(infocards, mkexclude(endcards, ['CHANNEL','WEBSITE'])) + \
63 exclude(endcards, mkexclude(infocards, ['VIDEO','PLAYLIST']))
64
65 all_countries = """AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ BA BB BD
66 BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BV BW BY BZ CA CC CD CF CG CH
67 CI CK CL CM CN CO CR CU CV CW CX CY CZ DE DJ DK DM DO DZ EC EE EG EH ER
68 ES ET FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GS GT
69 GU GW GY HK HM HN HR HT HU ID IE IL IM IN IO IQ IR IS IT JE JM JO JP KE
70 KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD
71 ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC NE NF
72 NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PN PR PS PT PW PY QA
73 RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR SS ST SV SX
74 SY SZ TC TD TF TG TH TJ TK TL TM TN TO TR TT TV TW TZ UA UG UM US UY UZ
75 VA VC VE VG VI VN VU WF WS YE YT ZA ZM ZW""".split()
76 whitelisted = sorted(meta2.get('availableCountries',[]))
77 blacklisted = sorted(set(all_countries) - set(whitelisted))
78
79 return {
80 **video_metadata(metadata),
81 'description': meta1['shortDescription'],
82 'rating': meta1['averageRating'],
83 'category': meta2['category'],
84 'aspectr': aspect_ratio,
85 'unlisted': meta2['isUnlisted'],
86 'whitelisted': whitelisted,
87 'blacklisted': blacklisted,
88 'poster': meta2['thumbnail']['thumbnails'][0]['url'],
89 'infocards': infocards,
90 'endcards': endcards,
91 'all_cards': allcards,
92 'subtitles': subtitles,
93 }
94
Imprint / Impressum