]> git.gir.st - subscriptionfeed.git/blob - app/browse/lib.py
switch browse endpoint from browse_ajax to youtubei/v1/browse
[subscriptionfeed.git] / app / browse / lib.py
1 import re
2 import requests
3 from datetime import datetime, timezone
4
5 from ..common.common import fetch_xml, parse_xml
6
7 def fetch_searchresults(q=None, sp=None):
8 for _ in range(2):
9 today = datetime.now(timezone.utc).strftime("%Y%m%d")
10 r = requests.get(f"https://www.youtube.com/results", {
11 'search_query': q,
12 'pbj': 1, # makes youtube return a json-response
13 'hl': 'en', #'en_US',
14 'sp': sp,
15 }, headers={
16 'x-youtube-client-name': '1',
17 'x-youtube-client-version': f'2.{today}.01.01', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000)
18 })
19 if not r.ok:
20 return None
21
22 # Sometimes, youtube throws an exception after the response already begun.
23 # This can manifest in two ways:
24 # 1) So the status code is 200, begins with JSON and switches to HTML half
25 # way through. WTF?! (This should be "fixed" by retrying, though)
26 # 2) The response just stopping mid-way through like this: response.text ==
27 # '[\r\n{"page": "search","rootVe": "4724"},\r\n{"page": "search",'
28 # hence, just try-catching the decoding step is the easiest way out.
29 try:
30 return r.json()
31 except:
32 continue # will return None once we break out of the loop
33
34 def fetch_ajax(params):
35 """
36 fetch data using a continuation protobuf
37 """
38 # TODO: handle auto_generated!
39 today = datetime.now(timezone.utc).strftime("%Y%m%d")
40
41 # TODO: this is not cached any more! -> https://github.com/reclosedev/requests-cache/issues/154
42 r = requests.post(f"https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json={
43 'continuation': params,
44 'context': {'client': {
45 'gl': 'US',
46 'hl': 'en',
47 'clientName': 'WEB',
48 'clientVersion': f'2.{today}.01.01',
49 }},
50 })
51
52 if not r.ok:
53 return None
54
55 return r.json()
56
57 def canonicalize_channel(name):
58 if re.fullmatch(r"(UC[A-Za-z0-9_-]{22})", name):
59 return name
60
61 # try /user/ (legacy URLs):
62 xmlfeed = fetch_xml("user", name)
63 if xmlfeed:
64 _, _, _, channel_id, _ = parse_xml(xmlfeed)
65 return channel_id
66
67 # get UCID of /c/ (vanity URLs):
68 today = datetime.now(timezone.utc).strftime("%Y%m%d")
69 r = requests.get(f'https://www.youtube.com/c/{name}/about?pbj=1&hl=en_US', headers={
70 'x-youtube-client-name': '1',
71 'x-youtube-client-version': f'2.{today}.01.01', # see fetch_searchresults()
72 })
73 try:
74 return r.json()[1]['response']['metadata']['channelMetadataRenderer']['rssUrl'].split("=")[1]
75 except:
76 pass
77
78 # unable to extract:
79 return None
Imprint / Impressum