]> git.gir.st - subscriptionfeed.git/blob - app/common/utils.py
move subscription cronjobs into common/utils
[subscriptionfeed.git] / app / common / utils.py
1 #!/bin/sh
2 ''':'
3 . "`dirname "$0"`/../../venv/bin/activate"
4 exec python "$0" "$@"
5 ':'''
6
7 import sys
8 import time
9 import sqlite3
10 import requests
11
12 from common import *
13
14 feed_param = {
15 'channel': 'channel_id',
16 'playlist': 'playlist_id',
17 }
18
19 def pull_subscriptions(verbose=1, force_all=False, limit=-1):
20 """
21 Crawls youtube channels' RSS feeds and stores found videos in the database.
22 verbose: 0: completely silent; 1: warn on errors; 2: log all accessed feeds
23 force_all: fetch all known channels. otherwise only those not crawled in 24h
24 limit: limit number of processed feeds
25 """
26 with sqlite3.connect(cf['global']['database']) as conn:
27 c = conn.cursor()
28 c.execute("""
29 SELECT DISTINCT s.channel_id, type
30 FROM subscriptions AS s LEFT JOIN crawler AS c
31 ON s.channel_id = c.channel_id
32 WHERE ? OR IFNULL(crawled_at,0) < datetime('now', '-1 day')
33 ORDER BY crawled_at
34 LIMIT ?
35 """, (force_all,limit))
36 results = c.fetchall()
37
38 if verbose >= 2 and not len(results):
39 sys.stderr.write(f'no feeds to update.\n')
40
41 for i,(feed_id, feed_type) in enumerate(results):
42 if i: time.sleep(60)
43 pull_feed(feed_id, feed_type, conn, verbose)
44
45 def pull_feed(feed_id, feed_type, conn, verbose):
46 c = conn.cursor()
47
48 if verbose >= 2:
49 sys.stderr.write(f'fetching {feed_id}\n')
50
51 xmlfeed = fetch_xml(feed_param[feed_type], feed_id)
52 if not xmlfeed:
53 if verbose:
54 sys.stderr.write(f'FETCH FAILED: {feed_id}\n')
55 return False
56
57 try:
58 update_channel(conn, xmlfeed)
59 except:
60 if verbose:
61 sys.stderr.write(f'STORE FAILED: {feed_id}\n')
62 # writing failed, so we store the feed in a file for later analysis.
63 with open('/tmp/pull-subscriptions.err', 'ab') as f:
64 f.write(f"<!-- {time.ctime()} ({int(time.time())}) -->\n"
65 .encode('ascii'))
66 f.write(xmlfeed + b"\n")
67 return False
68
69 # update crawled_at timestamp:
70 c.execute("""
71 INSERT OR REPLACE INTO crawler (channel_id)
72 VALUES (?)
73 """, (feed_id,))
74
75 conn.commit()
76 return True
77
78
79 def update_subscriptions(verbose=1, limit=-1):
80 """
81 Refreshes the websub (pubsubhubhub) subscription requests for youtube feeds.
82 verbose: 0: completely silent; 1: warn on errors; 2: log all accessed feeds
83 limit: limit number of processed feeds
84 """
85 with sqlite3.connect(cf['global']['database']) as conn:
86 c = conn.cursor()
87 c.execute("""
88 SELECT DISTINCT s.channel_id, type
89 FROM subscriptions AS s LEFT JOIN websub AS w
90 ON s.channel_id = w.channel_id
91 WHERE IFNULL(subscribed_until,0) < datetime('now', '+12 hours')
92 ORDER BY subscribed_until
93 """)
94 results = c.fetchall()
95
96 if verbose >= 2 and not len(results):
97 sys.stderr.write(f'no feeds to update.\n')
98
99 for i,(feed_id, feed_type) in enumerate(results):
100 if i: time.sleep(60)
101 update_feed(feed_id, feed_type, verbose)
102
103 def update_feed(feed_id, feed_type, verbose):
104 webhook = cf['websub']['public_uri']
105 lease = cf['websub']['lease']
106 hmackey = cf['websub']['hmac_key']
107
108 if verbose >= 2:
109 sys.stderr.write(f'updating {feed_id}\n')
110
111 feed_type = feed_param[feed_type]
112 version, timestamp, nonce, sig = "v1", int(time.time()), 0, "x" # TODO:sig,nonce
113 r = requests.post("https://pubsubhubbub.appspot.com/subscribe", {
114 "hub.callback": f"{webhook}/websub/{version}/{timestamp}/" + \
115 f"{nonce}/{feed_id}/{sig}",
116 "hub.topic": f"https://www.youtube.com/xml/feeds/videos.xml" + \
117 f"?{feed_type}={feed_id}",
118 "hub.verify": "async",
119 "hub.mode": "subscribe",
120 "hub.lease_seconds": lease,
121 "hub.secret": hmackey,
122 })
123 if not r.ok:
124 if verbose:
125 sys.stderr.write(f'FAILED {feed_id}: {r.text}\n')
126 return False
127
128 return True
129
130
131 if __name__ == '__main__':
132 if len(sys.argv) < 2 or sys.argv[1] not in ['pull','websub']:
133 sys.stderr.write(f'Usage: YT_CONFIG=... {sys.argv[0]} pull [-f] [-1] [-v|-vv]\n')
134 sys.stderr.write(f' YT_CONFIG=... {sys.argv[0]} websub [-1] [-v|-vv]\n')
135 sys.stderr.write(f'-f: force even if still up-to-date-ish\n')
136 sys.stderr.write(f'-v: report errors\n')
137 sys.stderr.write(f'-vv: report accessed feeds\n')
138 sys.stderr.write(f'-1: limit to one feed (for testing it works)\n')
139 sys.exit(1)
140
141 verbosity = 2 if '-vv' in sys.argv else 1 if '-v' in sys.argv else 0
142 limit = 1 if '-1' in sys.argv else -1
143 force = '-f' in sys.argv
144
145 if 'pull' in sys.argv:
146 pull_subscriptions(verbosity, force, limit)
147 elif 'websub' in sys.argv:
148 update_subscriptions(verbosity, limit)
Imprint / Impressum