]> git.gir.st - subscriptionfeed.git/blob - app/common/utils.py
remove now unused cipher fetching code
[subscriptionfeed.git] / app / common / utils.py
1 #!/bin/sh
2 ''':'
3 . "`dirname "$0"`/../../venv/bin/activate"
4 exec python "$0" "$@"
5 ':'''
6
7 import os
8 import sys
9 import time
10 import secrets
11 import sqlite3
12 import requests
13 import subprocess
14 import html.parser
15
16 from common import *
17
18 feed_param = {
19 'channel': 'channel_id',
20 'playlist': 'playlist_id',
21 }
22
23 def pull_subscriptions(verbose=1, force_all=False, limit=-1):
24 """
25 Crawls youtube channels' RSS feeds and stores found videos in the database.
26 verbose: 0: completely silent; 1: warn on errors; 2: log all accessed feeds
27 force_all: fetch all known channels. otherwise only those not crawled in 24h
28 limit: limit number of processed feeds
29 """
30 with sqlite3.connect(cf['global']['database']) as conn:
31 c = conn.cursor()
32 c.execute("""
33 SELECT DISTINCT s.channel_id, type
34 FROM subscriptions AS s LEFT JOIN crawler AS c
35 ON s.channel_id = c.channel_id
36 WHERE ? OR IFNULL(crawled_at,0) < datetime('now', '-1 day')
37 ORDER BY crawled_at
38 LIMIT ?
39 """, (force_all,limit))
40 results = c.fetchall()
41
42 if verbose >= 2 and not len(results):
43 sys.stderr.write(f'no feeds to update.\n')
44
45 for i,(feed_id, feed_type) in enumerate(results):
46 if i: time.sleep(60)
47 pull_feed(feed_id, feed_type, conn, verbose)
48
49 def pull_feed(feed_id, feed_type, conn, verbose):
50 c = conn.cursor()
51
52 if verbose >= 2:
53 sys.stderr.write(f'fetching {feed_id}\n')
54
55 xmlfeed = fetch_xml(feed_param[feed_type], feed_id)
56 if not xmlfeed:
57 if verbose:
58 sys.stderr.write(f'FETCH FAILED: {feed_id}\n')
59 return False
60
61 try:
62 update_channel(conn, xmlfeed)
63 except:
64 if verbose:
65 sys.stderr.write(f'STORE FAILED: {feed_id}\n')
66 # writing failed, so we store the feed in a file for later analysis.
67 with open('/tmp/pull-subscriptions.err', 'ab') as f:
68 f.write(f"<!-- {time.ctime()} ({int(time.time())}) -->\n"
69 .encode('ascii'))
70 f.write(xmlfeed + b"\n")
71 return False
72
73 # update crawled_at timestamp:
74 c.execute("""
75 INSERT OR REPLACE INTO crawler (channel_id)
76 VALUES (?)
77 """, (feed_id,))
78
79 conn.commit()
80 return True
81
82
83 def update_subscriptions(verbose=1, force_all=False, limit=-1):
84 """
85 Refreshes the websub (pubsubhubhub) subscription requests for youtube feeds.
86 verbose: 0: completely silent; 1: warn on errors; 2: log all accessed feeds
87 limit: limit number of processed feeds
88 """
89 with sqlite3.connect(cf['global']['database']) as conn:
90 c = conn.cursor()
91 c.execute("""
92 SELECT DISTINCT s.channel_id, type
93 FROM subscriptions AS s LEFT JOIN websub AS w
94 ON s.channel_id = w.channel_id
95 WHERE ? OR IFNULL(subscribed_until,0) < datetime('now','+12 hours')
96 AND type = 'channel' -- playlists don't support websub
97 ORDER BY subscribed_until
98 LIMIT ?
99 """, (force_all,limit))
100 results = c.fetchall()
101
102 if verbose >= 2 and not len(results):
103 sys.stderr.write(f'no feeds to update.\n')
104
105 for i,(feed_id, feed_type) in enumerate(results):
106 if i: time.sleep(60)
107 update_feed(feed_id, feed_type, verbose)
108
109 def update_feed(feed_id, feed_type, verbose):
110 webhook = cf['webhooks']['public_uri']
111 lease = cf['websub']['lease']
112 hmackey = cf['websub']['hmac_key']
113
114 if verbose >= 2:
115 sys.stderr.write(f'updating {feed_id}\n')
116
117 version, timestamp = "v1", int(time.time())
118 nonce = secrets.token_urlsafe(16)
119 sig = websub_url_hmac(hmackey, feed_id, timestamp, nonce)
120 import requests_cache
121 with requests_cache.disabled():
122 r = requests.post("https://pubsubhubbub.appspot.com/subscribe", {
123 "hub.callback": f"{webhook}/websub/{version}/{timestamp}/" + \
124 f"{nonce}/{feed_id}/{sig}",
125 "hub.topic": f"https://www.youtube.com/xml/feeds/videos.xml" + \
126 f"?{feed_param[feed_type]}={feed_id}",
127 "hub.verify": "async",
128 "hub.mode": "subscribe",
129 "hub.lease_seconds": lease,
130 "hub.secret": hmackey,
131 })
132 if not r.ok:
133 if verbose:
134 sys.stderr.write(f'FAILED {feed_id}: {r.text}\n')
135 return False
136
137 return True
138
139
140 if __name__ == '__main__':
141 verbosity = 2 if '-vv' in sys.argv else 1 if '-v' in sys.argv else 0
142 limit = 1 if '-1' in sys.argv else -1
143 force = '-f' in sys.argv
144
145 if 'pull' in sys.argv:
146 pull_subscriptions(verbosity, force, limit)
147 elif 'websub' in sys.argv:
148 update_subscriptions(verbosity, force, limit)
149 else:
150 sys.stderr.write(
151 f'Usage: YT_CONFIG=... {sys.argv[0]} pull [-f] [-1] [-v|-vv]\n'
152 f' YT_CONFIG=... {sys.argv[0]} websub [-f] [-1] [-v|-vv]\n'
153 f'-f: force even if still up-to-date-ish\n'
154 f'-v: report errors\n'
155 f'-vv: report accessed feeds\n'
156 f'-1: limit to one feed (for testing it works)\n')
157 sys.exit(1)
Imprint / Impressum