]>
git.gir.st - subscriptionfeed.git/blob - app/common/utils.py
3 . "`dirname "$0"`/../../venv/bin/activate"
19 'channel': 'channel_id',
20 'playlist': 'playlist_id',
23 def pull_subscriptions(verbose
=1, force_all
=False, limit
=-1):
25 Crawls youtube channels' RSS feeds and stores found videos in the database.
26 verbose: 0: completely silent; 1: warn on errors; 2: log all accessed feeds
27 force_all: fetch all known channels. otherwise only those not crawled in 24h
28 limit: limit number of processed feeds
30 with sqlite3
.connect(cf
['global']['database']) as conn
:
33 SELECT DISTINCT s.channel_id, type
34 FROM subscriptions AS s LEFT JOIN crawler AS c
35 ON s.channel_id = c.channel_id
36 WHERE ? OR IFNULL(crawled_at,0) < datetime('now', '-1 day')
39 """, (force_all
,limit
))
40 results
= c
.fetchall()
42 if verbose
>= 2 and not len(results
):
43 sys
.stderr
.write(f
'no feeds to update.\n')
45 for i
,(feed_id
, feed_type
) in enumerate(results
):
47 pull_feed(feed_id
, feed_type
, conn
, verbose
)
49 def pull_feed(feed_id
, feed_type
, conn
, verbose
):
53 sys
.stderr
.write(f
'fetching {feed_id}\n')
55 xmlfeed
= fetch_xml(feed_param
[feed_type
], feed_id
)
58 sys
.stderr
.write(f
'FETCH FAILED: {feed_id}\n')
62 update_channel(conn
, xmlfeed
)
65 sys
.stderr
.write(f
'STORE FAILED: {feed_id}\n')
66 # writing failed, so we store the feed in a file for later analysis.
67 with
open('/tmp/pull-subscriptions.err', 'ab') as f
:
68 f
.write(f
"<!-- {time.ctime()} ({int(time.time())}) -->\n"
70 f
.write(xmlfeed
+ b
"\n")
73 # update crawled_at timestamp:
75 INSERT OR REPLACE INTO crawler (channel_id)
83 def update_subscriptions(verbose
=1, force_all
=False, limit
=-1):
85 Refreshes the websub (pubsubhubhub) subscription requests for youtube feeds.
86 verbose: 0: completely silent; 1: warn on errors; 2: log all accessed feeds
87 limit: limit number of processed feeds
89 with sqlite3
.connect(cf
['global']['database']) as conn
:
92 SELECT DISTINCT s.channel_id, type
93 FROM subscriptions AS s LEFT JOIN websub AS w
94 ON s.channel_id = w.channel_id
95 WHERE ? OR IFNULL(subscribed_until,0) < datetime('now','+12 hours')
96 AND type = 'channel' -- playlists don't support websub
97 ORDER BY subscribed_until
99 """, (force_all
,limit
))
100 results
= c
.fetchall()
102 if verbose
>= 2 and not len(results
):
103 sys
.stderr
.write(f
'no feeds to update.\n')
105 for i
,(feed_id
, feed_type
) in enumerate(results
):
107 update_feed(feed_id
, feed_type
, verbose
)
109 def update_feed(feed_id
, feed_type
, verbose
):
110 webhook
= cf
['webhooks']['public_uri']
111 lease
= cf
['websub']['lease']
112 hmackey
= cf
['websub']['hmac_key']
115 sys
.stderr
.write(f
'updating {feed_id}\n')
117 version
, timestamp
= "v1", int(time
.time())
118 nonce
= secrets
.token_urlsafe(16)
119 sig
= websub_url_hmac(hmackey
, feed_id
, timestamp
, nonce
)
120 import requests_cache
121 with requests_cache
.disabled():
122 r
= requests
.post("https://pubsubhubbub.appspot.com/subscribe", {
123 "hub.callback": f
"{webhook}/websub/{version}/{timestamp}/" + \
124 f
"{nonce}/{feed_id}/{sig}",
125 "hub.topic": f
"https://www.youtube.com/xml/feeds/videos.xml" + \
126 f
"?{feed_param[feed_type]}={feed_id}",
127 "hub.verify": "async",
128 "hub.mode": "subscribe",
129 "hub.lease_seconds": lease
,
130 "hub.secret": hmackey
,
134 sys
.stderr
.write(f
'FAILED {feed_id}: {r.text}\n')
140 if __name__
== '__main__':
141 verbosity
= 2 if '-vv' in sys
.argv
else 1 if '-v' in sys
.argv
else 0
142 limit
= 1 if '-1' in sys
.argv
else -1
143 force
= '-f' in sys
.argv
145 if 'pull' in sys
.argv
:
146 pull_subscriptions(verbosity
, force
, limit
)
147 elif 'websub' in sys
.argv
:
148 update_subscriptions(verbosity
, force
, limit
)
151 f
'Usage: YT_CONFIG=... {sys.argv[0]} pull [-f] [-1] [-v|-vv]\n'
152 f
' YT_CONFIG=... {sys.argv[0]} websub [-f] [-1] [-v|-vv]\n'
153 f
'-f: force even if still up-to-date-ish\n'
154 f
'-v: report errors\n'
155 f
'-vv: report accessed feeds\n'
156 f
'-1: limit to one feed (for testing it works)\n')