]>
git.gir.st - subscriptionfeed.git/blob - app/youtube/lib.py
3 from urllib
.parse
import urlparse
5 from ..common
.common
import video_metadata
6 from ..common
.innertube
import prepare_infocards
, prepare_endcards
, G
8 def prepare_metadata(metadata
):
9 meta
= metadata
['videoDetails']
11 # the actual video streams have exact information:
13 sd
= metadata
['streamingData']
14 some_stream
= (sd
.get('adaptiveFormats',[]) + sd
.get('formats',[]))[0]
15 aspect_ratio
= some_stream
['width'] / some_stream
['height']
16 # if that's unavailable (e.g. on livestreams), fall back to 16:9
20 # Note: we could get subtitles in multiple formats directly by querying
21 # https://video.google.com/timedtext?hl=en&type=list&v=<VIDEO_ID> followed by
22 # https://www.youtube.com/api/timedtext?lang=<LANG_CODE>&v=<VIDEO_ID>&fmt={srv1|srv2|srv3|ttml|vtt},
23 # but that won't give us autogenerated subtitles (and is an extra request).
24 # we can still add &fmt= to the extracted URLs below (first one takes precedence).
25 try: # find the native language captions (assuming there is only 1 audioTrack) (any level might not exist):
26 default_track
= metadata
.get('captions',{}).get('playerCaptionsTracklistRenderer',{}).get('defaultAudioTrackIndex', 0)
27 main_subtitle
= metadata
['captions']['playerCaptionsTracklistRenderer']['audioTracks'][default_track
]['captionTrackIndices']
32 'code':cc
['languageCode'],
33 'autogenerated':cc
.get('kind')=="asr",
34 'name':cc
['name']|G
.text
,
35 'default':i
==main_subtitle
,
36 'query':"fmt=vtt&"+urlparse(cc
['baseUrl']).query
} # for our internal proxy
37 for i
,cc
in enumerate(metadata|
G('captions')
38 |
G('playerCaptionsTracklistRenderer')
39 |
G('captionTracks') or [])
40 # sort order: default lang gets weight 0 (first), other manually translated weight 1, autogenerated weight 2:
41 ], key
=lambda cc
: (not cc
['default']) + cc
['autogenerated'])
43 endcards
= prepare_endcards(metadata
)
45 thumbs
= meta
['thumbnail']['thumbnails']
46 poster
= sorted(thumbs
, key
=lambda t
: t
['width'], reverse
=True)[0]['url']
49 **video_metadata(metadata
),
50 'description': meta
['shortDescription'],
51 'aspectr': aspect_ratio
,
52 'unlisted': not meta
['isCrawlable'],
55 'all_cards': endcards
,
56 'subtitles': subtitles
,
59 def channel_exists(feed_id
):
60 feed_type
= "channel_id" if re
.match(r
"^UC[A-Za-z0-9_-]{22}$", feed_id
) else "user"
61 r
= requests
.head("https://www.youtube.com/feeds/videos.xml", params
={
66 def microformat_parser(metadata
):
67 """ parses additional metadata only available with get_video_info(metaOnly=True) """
68 # WARN: breaks if metadata == None (e.g. invalid video id)
69 meta2
= metadata
.get('microformat',{}).get('playerMicroformatRenderer',{})
70 all_countries
= """AD AE AF AG AI AL AM AO AQ AR AS AT AU AW AX AZ BA BB BD
71 BE BF BG BH BI BJ BL BM BN BO BQ BR BS BT BV BW BY BZ CA CC CD CF CG CH
72 CI CK CL CM CN CO CR CU CV CW CX CY CZ DE DJ DK DM DO DZ EC EE EG EH ER
73 ES ET FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GP GQ GR GS GT
74 GU GW GY HK HM HN HR HT HU ID IE IL IM IN IO IQ IR IS IT JE JM JO JP KE
75 KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD
76 ME MF MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC NE NF
77 NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PN PR PS PT PW PY QA
78 RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR SS ST SV SX
79 SY SZ TC TD TF TG TH TJ TK TL TM TN TO TR TT TV TW TZ UA UG UM US UY UZ
80 VA VC VE VG VI VN VU WF WS YE YT ZA ZM ZW""".split()
81 whitelisted
= sorted(meta2
.get('availableCountries',[]))
82 blacklisted
= sorted(set(all_countries
) - set(whitelisted
))
84 'all' if not blacklisted
else
85 'none' if not whitelisted
else
86 f
"not in {' '.join(blacklisted)}" if len(blacklisted
) < len(whitelisted
) else
87 f
"only in {' '.join(whitelisted)}"
90 poster
= sorted(meta2
['thumbnail']['thumbnails'], key
=lambda t
: t
['width'], reverse
=True)[0]['url']
92 infocards
= prepare_infocards(metadata
)
93 endcards
= prepare_endcards(metadata
)
94 # combine cards to weed out duplicates. for videos and playlists prefer
95 # infocards, for channels and websites prefer endcards, as those have more
96 # information than the other.
97 # if the card type is not in ident, we use the whole card for comparison
98 # (otherwise they'd all replace each other)
99 ident
= { # ctype -> ident
101 'PLAYLIST': 'playlist_id',
102 'CHANNEL': 'channel_id',
106 getident
= lambda c
: c
['content'].get(ident
.get(c
['type']), c
)
107 mkexclude
= lambda cards
, types
: [getident(c
) for c
in cards
if c
['type'] in types
]
108 exclude
= lambda cards
, without
: [c
for c
in cards
if getident(c
) not in without
]
110 allcards
= exclude(infocards
, mkexclude(endcards
, ['CHANNEL','WEBSITE'])) + \
111 exclude(endcards
, mkexclude(infocards
, ['VIDEO','PLAYLIST']))
114 'published': meta2
.get('publishDate'),
115 #'uploaded': meta2.get('uploadDate'),
116 #'infocards': infocards,
117 #'endcards': endcards,
118 'all_cards': allcards
,