#!/bin/sh ''':' . /opt/yt/venv/bin/activate exec python $0 "$@" ':''' import sys import time import sqlite3 sys.path.insert(0,"/opt/yt/app/") from common.common import * with sqlite3.connect(cf['global']['database']) as conn: c = conn.cursor() c.execute(""" SELECT DISTINCT subscriptions.channel_id FROM subscriptions LEFT JOIN crawler ON subscriptions.channel_id = crawler.channel_id -- WHERE crawled_at IS NULL OR crawled_at < datetime('now', '-1 day') ORDER BY crawler.crawled_at """) for (channel_id,) in c.fetchall(): if '-v' in sys.argv: sys.stderr.write(f'fetching {channel_id}\n') xmlfeed = fetch_xml("channel_id", channel_id) if not xmlfeed and '-v' in sys.argv: sys.stderr.write(f'FETCH FAILED: {channel_id}\n') try: update_channel(conn, xmlfeed) except: if '-v' in sys.argv: sys.stderr.write(f'STORE FAILED: {channel_id}\n') # writing to the database failed, so we store the feed in a file for later analysis. with open('/tmp/pull-subscriptions.err', 'ab') as f: f.write(f"\n".encode('ascii')) f.write(xmlfeed + b"\n") continue # update crawled_at timestamp: c.execute(""" INSERT OR REPLACE INTO crawler (channel_id) VALUES (?) """, (channel_id,)) conn.commit() time.sleep(60)