From f190f46d482268c47337fdc7e4174f0197a9c41d Mon Sep 17 00:00:00 2001 From: girst Date: Tue, 4 Aug 2020 18:06:27 +0200 Subject: [PATCH] split metadata in essential and extended extended is only used by /watch, but essential also for store_video_metadata(). this allows is to move parse_metadata() to the youtube blueprint. --- app/common/common.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/app/common/common.py b/app/common/common.py index fbb6f2c..4508124 100644 --- a/app/common/common.py +++ b/app/common/common.py @@ -140,7 +140,7 @@ def update_channel(db, xmldata, from_webhook=False): if from_webhook: current_app.logger.warning(f" is webhook and new") # XXX: remove _, _, meta, _, _ = get_video_info(video['video_id']) if meta: - meta = prepare_metadata(meta) + meta = video_metadata(meta) published = dateutil.parser.parse(meta['published']) if from_webhook: current_app.logger.warning(f" uploaded {published}") # XXX: remove if (now - published).days < 7: @@ -261,6 +261,25 @@ def unscramble(cipher, algo): # test video id: UxxajLWwzqY sig = cipher.get('sig', [''.join(signature)])[0] return f"{cipher['url'][0]}&{sp}={sig}" +def video_metadata(metadata): + if not metadata: + return {} + + meta1 = metadata['videoDetails'] + meta2 = metadata['microformat']['playerMicroformatRenderer'] + + published_at = meta2.get('liveBroadcastDetails',{}) \ + .get('startTimestamp', f"{meta2['publishDate']}T00:00:00Z") + + return { + 'title': meta1['title'], + 'author': meta1['author'], + 'channel_id': meta1['channelId'], + 'published': published_at, + 'views': int(meta1['viewCount']), + 'length': int(meta1['lengthSeconds']), + } + def prepare_metadata(metadata): meta1 = metadata['videoDetails'] meta2 = metadata['microformat']['playerMicroformatRenderer'] @@ -338,19 +357,9 @@ def prepare_metadata(metadata): whitelisted = sorted(meta2.get('availableCountries',[])) blacklisted = sorted(set(all_countries) - set(whitelisted)) - published_at = f"{meta2['publishDate']}T00:00:00Z" # yyyy-mm-dd - # 'premiere' videos (and livestreams?) have a ISO8601 date available: - if 'liveBroadcastDetails' in meta2 and 'startTimestamp' in meta2['liveBroadcastDetails']: # TODO: tighten up - published_at = meta2['liveBroadcastDetails']['startTimestamp'] - return { - 'title': meta1['title'], - 'author': meta1['author'], - 'channel_id': meta1['channelId'], + **video_metadata(metadata), 'description': meta1['shortDescription'], - 'published': published_at, - 'views': meta1['viewCount'], - 'length': int(meta1['lengthSeconds']), 'rating': meta1['averageRating'], 'category': meta2['category'], 'aspectr': aspect_ratio, @@ -373,7 +382,7 @@ def store_video_metadata(video_id): if new_video: _, _, meta, _, _ = get_video_info(video_id) if meta: - meta = prepare_metadata(meta) + meta = video_metadata(meta) c.execute(""" INSERT OR IGNORE INTO videos (id, channel_id, title, published, crawled) VALUES (?, ?, ?, datetime(?), datetime(?)) -- 2.39.3