]>
git.gir.st - subscriptionfeed.git/blob - app/common.py
4 from datetime
import datetime
, timezone
5 from xml
.etree
import ElementTree
6 from urllib
.parse
import parse_qs
7 from configparser
import ConfigParser
10 config_filename
= os
.environ
.get('YT_CONFIG', '/etc/yt/config.ini')
11 cf
.read(config_filename
)
13 def fetch_xml(feed_type
, feed_id
):
14 r
= requests
.get(f
"https://www.youtube.com/feeds/videos.xml?{feed_type}={feed_id}")
20 def parse_xml(xmldata
):
22 'atom':"http://www.w3.org/2005/Atom",
23 'yt': "http://www.youtube.com/xml/schemas/2015",
24 'media':"http://search.yahoo.com/mrss/"
27 feed
= ElementTree
.fromstring(xmldata
)
28 author
= feed
.find('atom:author',ns
).find('atom:name',ns
).text
if feed
.find('atom:author',ns
) else None
29 if feed
.find('yt:channelId',ns
):
30 channel_id
= feed
.find('yt:channelId',ns
).text
31 else: # TODO: clean this up (websub has no yt:channelId, this should be adapted for playlists)
32 self
= feed
.find('atom:link[@rel="self"]',ns
).get('href')
33 channel_id
= parse_qs(self
.split('?')[1]).get('channel_id')[0]
34 title
= feed
.find('atom:title',ns
).text
36 for entry
in feed
.findall('atom:entry',ns
):
38 'video_id': entry
.find('yt:videoId',ns
).text
,
39 'title': entry
.find('atom:title',ns
).text
,
40 'published': entry
.find('atom:published',ns
).text
,
41 'channel_id': entry
.find('yt:channelId',ns
).text
,
42 'author': entry
.find('atom:author',ns
).find('atom:name',ns
).text
,
43 # extra fields for pull_subs/webhook:
44 'updated': entry
.find('atom:updated',ns
).text
,
45 #'description': entry.find('media:group',ns).find('media:description',ns).text ##xxx:missing for websub
48 return title
, author
, channel_id
, videos
50 def update_channel(db
, xmldata
):
52 returns True on success, False on failure. rigorous error checking is required, otherwise data will be lost!
53 the caller MUST (as per RFC 2119) write (append) the xmlfeed into a file on error.
55 if not xmldata
: return False
57 # Note: wbesub does not return global author
58 title
, author
, channel_id
, videos
= parse_xml(xmldata
) #xxx: perl-code had this eval'd for a die
62 now
= datetime
.now(timezone
.utc
)
63 updated
= dateutil
.parser
.parse(video
['updated'])
64 published
= dateutil
.parser
.parse(video
['updated'])
65 # if update and published time are near-identical, it's new. use crawl time if it was published within a week.
66 # else, it's just an update to an older video (before we subscribed, so use original upload time).
67 if (updated
- published
).seconds
< 60 and (now
- published
).days
< 7:
73 INSERT OR IGNORE INTO videos (id, channel_id, title, published, crawled)
74 VALUES (?, ?, ?, datetime(?), datetime(?))
75 """, (video
['video_id'], video
['channel_id'], video
['title'], video
['published'], timestamp
)) #XXX:errorcheck
77 # update channel name (we don't fetch it on subscribing)
78 author
= video
['author'] # XXX: doing this once per channel is enough (for pull-subs.pl)
80 INSERT OR REPLACE INTO channels (id, name)
82 """, (channel_id
, author
)) #XXX:errorcheck