]> git.gir.st - subscriptionfeed.git/blob - app/common.py
initial commit
[subscriptionfeed.git] / app / common.py
1 import os
2 import requests
3 import dateutil.parser
4 from datetime import datetime, timezone
5 from xml.etree import ElementTree
6 from urllib.parse import parse_qs
7 from configparser import ConfigParser
8
9 cf = ConfigParser()
10 config_filename = os.environ.get('YT_CONFIG', '/etc/yt/config.ini')
11 cf.read(config_filename)
12
13 def fetch_xml(feed_type, feed_id):
14 r = requests.get(f"https://www.youtube.com/feeds/videos.xml?{feed_type}={feed_id}")
15 if not r.ok:
16 return None
17
18 return r.text
19
20 def parse_xml(xmldata):
21 ns = {
22 'atom':"http://www.w3.org/2005/Atom",
23 'yt': "http://www.youtube.com/xml/schemas/2015",
24 'media':"http://search.yahoo.com/mrss/"
25 }
26
27 feed = ElementTree.fromstring(xmldata)
28 author = feed.find('atom:author',ns).find('atom:name',ns).text if feed.find('atom:author',ns) else None
29 if feed.find('yt:channelId',ns):
30 channel_id = feed.find('yt:channelId',ns).text
31 else: # TODO: clean this up (websub has no yt:channelId, this should be adapted for playlists)
32 self = feed.find('atom:link[@rel="self"]',ns).get('href')
33 channel_id = parse_qs(self.split('?')[1]).get('channel_id')[0]
34 title = feed.find('atom:title',ns).text
35 videos = []
36 for entry in feed.findall('atom:entry',ns):
37 videos.append({
38 'video_id': entry.find('yt:videoId',ns).text,
39 'title': entry.find('atom:title',ns).text,
40 'published': entry.find('atom:published',ns).text,
41 'channel_id': entry.find('yt:channelId',ns).text,
42 'author': entry.find('atom:author',ns).find('atom:name',ns).text,
43 # extra fields for pull_subs/webhook:
44 'updated': entry.find('atom:updated',ns).text,
45 #'description': entry.find('media:group',ns).find('media:description',ns).text ##xxx:missing for websub
46 })
47
48 return title, author, channel_id, videos
49
50 def update_channel(db, xmldata):
51 """
52 returns True on success, False on failure. rigorous error checking is required, otherwise data will be lost!
53 the caller MUST (as per RFC 2119) write (append) the xmlfeed into a file on error.
54 """
55 if not xmldata: return False
56
57 # Note: wbesub does not return global author
58 title, author, channel_id, videos = parse_xml(xmldata) #xxx: perl-code had this eval'd for a die
59
60 c = db.cursor()
61 for video in videos:
62 now = datetime.now(timezone.utc)
63 updated = dateutil.parser.parse(video['updated'])
64 published = dateutil.parser.parse(video['updated'])
65 # if update and published time are near-identical, it's new. use crawl time if it was published within a week.
66 # else, it's just an update to an older video (before we subscribed, so use original upload time).
67 if (updated - published).seconds < 60 and (now - published).days < 7:
68 timestamp = now
69 else:
70 timestamp = published
71
72 c.execute("""
73 INSERT OR IGNORE INTO videos (id, channel_id, title, published, crawled)
74 VALUES (?, ?, ?, datetime(?), datetime(?))
75 """, (video['video_id'], video['channel_id'], video['title'], video['published'], timestamp)) #XXX:errorcheck
76
77 # update channel name (we don't fetch it on subscribing)
78 author = video['author'] # XXX: doing this once per channel is enough (for pull-subs.pl)
79 c.execute("""
80 INSERT OR REPLACE INTO channels (id, name)
81 VALUES (?, ?)
82 """, (channel_id, author)) #XXX:errorcheck
83
84 return True
Imprint / Impressum