]> git.gir.st - subscriptionfeed.git/blob - app/common/utils.py
fix websub update being overzealous
[subscriptionfeed.git] / app / common / utils.py
1 #!/bin/sh
2 ''':'
3 . "`dirname "$0"`/../../venv/bin/activate"
4 exec python "$0" "$@"
5 ':'''
6
7 import sys
8 import time
9 import secrets
10 import sqlite3
11 import requests
12
13 from common import *
14
15 feed_param = {
16 'channel': 'channel_id',
17 'playlist': 'playlist_id',
18 }
19
20 def pull_subscriptions(verbose=1, force_all=False, limit=-1):
21 """
22 Crawls youtube channels' RSS feeds and stores found videos in the database.
23 verbose: 0: completely silent; 1: warn on errors; 2: log all accessed feeds
24 force_all: fetch all known channels. otherwise only those not crawled in 24h
25 limit: limit number of processed feeds
26 """
27 with sqlite3.connect(cf['global']['database']) as conn:
28 c = conn.cursor()
29 c.execute("""
30 SELECT DISTINCT s.channel_id, type
31 FROM subscriptions AS s LEFT JOIN crawler AS c
32 ON s.channel_id = c.channel_id
33 WHERE ? OR IFNULL(crawled_at,0) < datetime('now', '-1 day')
34 ORDER BY crawled_at
35 LIMIT ?
36 """, (force_all,limit))
37 results = c.fetchall()
38
39 if verbose >= 2 and not len(results):
40 sys.stderr.write(f'no feeds to update.\n')
41
42 for i,(feed_id, feed_type) in enumerate(results):
43 if i: time.sleep(60)
44 pull_feed(feed_id, feed_type, conn, verbose)
45
46 def pull_feed(feed_id, feed_type, conn, verbose):
47 c = conn.cursor()
48
49 if verbose >= 2:
50 sys.stderr.write(f'fetching {feed_id}\n')
51
52 xmlfeed = fetch_xml(feed_param[feed_type], feed_id)
53 if not xmlfeed:
54 if verbose:
55 sys.stderr.write(f'FETCH FAILED: {feed_id}\n')
56 return False
57
58 try:
59 update_channel(conn, xmlfeed)
60 except:
61 if verbose:
62 sys.stderr.write(f'STORE FAILED: {feed_id}\n')
63 # writing failed, so we store the feed in a file for later analysis.
64 with open('/tmp/pull-subscriptions.err', 'ab') as f:
65 f.write(f"<!-- {time.ctime()} ({int(time.time())}) -->\n"
66 .encode('ascii'))
67 f.write(xmlfeed + b"\n")
68 return False
69
70 # update crawled_at timestamp:
71 c.execute("""
72 INSERT OR REPLACE INTO crawler (channel_id)
73 VALUES (?)
74 """, (feed_id,))
75
76 conn.commit()
77 return True
78
79
80 def update_subscriptions(verbose=1, force_all=False, limit=-1):
81 """
82 Refreshes the websub (pubsubhubhub) subscription requests for youtube feeds.
83 verbose: 0: completely silent; 1: warn on errors; 2: log all accessed feeds
84 limit: limit number of processed feeds
85 """
86 with sqlite3.connect(cf['global']['database']) as conn:
87 c = conn.cursor()
88 c.execute("""
89 SELECT DISTINCT s.channel_id, type
90 FROM subscriptions AS s LEFT JOIN websub AS w
91 ON s.channel_id = w.channel_id
92 WHERE ? OR IFNULL(subscribed_until,0) < datetime('now','+12 hours')
93 ORDER BY subscribed_until
94 LIMIT ?
95 """, (force_all,limit))
96 results = c.fetchall()
97
98 if verbose >= 2 and not len(results):
99 sys.stderr.write(f'no feeds to update.\n')
100
101 for i,(feed_id, feed_type) in enumerate(results):
102 if i: time.sleep(60)
103 update_feed(feed_id, feed_type, verbose)
104
105 def update_feed(feed_id, feed_type, verbose):
106 webhook = cf['websub']['public_uri']
107 lease = cf['websub']['lease']
108 hmackey = cf['websub']['hmac_key']
109
110 if verbose >= 2:
111 sys.stderr.write(f'updating {feed_id}\n')
112
113 feed_type = feed_param[feed_type]
114 version, timestamp = "v1", int(time.time())
115 nonce = secrets.token_urlsafe(16)
116 sig = websub_url_hmac(hmackey, feed_id, timestamp, nonce)
117 r = requests.post("https://pubsubhubbub.appspot.com/subscribe", {
118 "hub.callback": f"{webhook}/websub/{version}/{timestamp}/" + \
119 f"{nonce}/{feed_id}/{sig}",
120 "hub.topic": f"https://www.youtube.com/xml/feeds/videos.xml" + \
121 f"?{feed_type}={feed_id}",
122 "hub.verify": "async",
123 "hub.mode": "subscribe",
124 "hub.lease_seconds": lease,
125 "hub.secret": hmackey,
126 })
127 if not r.ok:
128 if verbose:
129 sys.stderr.write(f'FAILED {feed_id}: {r.text}\n')
130 return False
131
132 return True
133
134
135 if __name__ == '__main__':
136 if len(sys.argv) < 2 or sys.argv[1] not in ['pull','websub']:
137 sys.stderr.write(
138 f'Usage: YT_CONFIG=... {sys.argv[0]} pull [-f] [-1] [-v|-vv]\n'
139 f' YT_CONFIG=... {sys.argv[0]} websub [-f] [-1] [-v|-vv]\n'
140 f'-f: force even if still up-to-date-ish\n'
141 f'-v: report errors\n'
142 f'-vv: report accessed feeds\n'
143 f'-1: limit to one feed (for testing it works)\n')
144 sys.exit(1)
145
146 verbosity = 2 if '-vv' in sys.argv else 1 if '-v' in sys.argv else 0
147 limit = 1 if '-1' in sys.argv else -1
148 force = '-f' in sys.argv
149
150 if 'pull' in sys.argv:
151 pull_subscriptions(verbosity, force, limit)
152 elif 'websub' in sys.argv:
153 update_subscriptions(verbosity, force, limit)
Imprint / Impressum