]> git.gir.st - subscriptionfeed.git/blob - app/browse/lib.py
display innertube error to user
[subscriptionfeed.git] / app / browse / lib.py
1 import re
2 import requests
3 from datetime import datetime, timezone
4
5 from ..common.common import fetch_xml, parse_xml
6 from ..common.innertube import G
7
8 def fetch_searchresults(q=None, sp=None):
9 for _ in range(2):
10 today = datetime.now(timezone.utc).strftime("%Y%m%d")
11 r = requests.get(f"https://www.youtube.com/results", {
12 'search_query': q,
13 'pbj': 1, # makes youtube return a json-response
14 'hl': 'en', #'en_US',
15 'sp': sp,
16 }, headers={
17 'x-youtube-client-name': '1',
18 'x-youtube-client-version': f'2.{today}.01.01', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000)
19 })
20 if not r.ok:
21 return None
22
23 # Sometimes, youtube throws an exception after the response already begun.
24 # This can manifest in two ways:
25 # 1) So the status code is 200, begins with JSON and switches to HTML half
26 # way through. WTF?! (This should be "fixed" by retrying, though)
27 # 2) The response just stopping mid-way through like this: response.text ==
28 # '[\r\n{"page": "search","rootVe": "4724"},\r\n{"page": "search",'
29 # hence, just try-catching the decoding step is the easiest way out.
30 try:
31 return r.json()
32 except:
33 continue # will return None once we break out of the loop
34
35 def fetch_ajax(params):
36 """
37 fetch data using a continuation protobuf
38 """
39 # TODO: handle auto_generated!
40 today = datetime.now(timezone.utc).strftime("%Y%m%d")
41
42 # TODO: this is not cached any more! -> https://github.com/reclosedev/requests-cache/issues/154
43 # Note: this 'innertube' API key exists since at least 2015: https://stackoverflow.com/q/33511165
44 r = requests.post(f"https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={
45 'continuation': params,
46 'context': {'client': {
47 'gl': 'US',
48 'hl': 'en',
49 'clientName': 'WEB',
50 'clientVersion': f'2.{today}.01.01',
51 }},
52 })
53
54 if not r.ok:
55 return None
56
57 return r.json()
58
59 def canonicalize_channel(name):
60 if re.fullmatch(r"(UC[A-Za-z0-9_-]{22})", name):
61 return name
62
63 # try /user/ (legacy URLs):
64 xmlfeed = fetch_xml("user", name)
65 if xmlfeed:
66 _, _, _, channel_id, _ = parse_xml(xmlfeed)
67 return channel_id
68
69 # get UCID of /c/ (vanity URLs):
70 today = datetime.now(timezone.utc).strftime("%Y%m%d")
71 r = requests.get(f'https://www.youtube.com/c/{name}/about?pbj=1&hl=en_US', headers={
72 'x-youtube-client-name': '1',
73 'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults()
74 })
75 try:
76 return r.json()[1]['response']['metadata']['channelMetadataRenderer']['rssUrl'].split("=")[1]
77 except:
78 pass
79
80 # unable to extract:
81 return None
82
83 def find_and_parse_error(result):
84 error_obj = (
85 result|G('responseContext')|G('errors')|G('error')|G(0)
86 or result|G('alerts')|G(0)|G('alertRenderer')
87 or result|G('error')
88 )
89 if error_obj is None:
90 return None
91
92 error_type = error_obj|G('code', 'type', 'status') or 'Error'
93 error = (
94 error_obj|G('debugInfo', 'externalErrorMessage')
95 or error_obj|G('text')|G.text
96 or error_obj|G('message')
97 or "unknown error"
98 )
99 return f"{error_type}: {error.rstrip('.')}"
Imprint / Impressum