From 954d036d8da7a79264d38031bf0565373274151c Mon Sep 17 00:00:00 2001 From: girst Date: Sat, 13 Nov 2021 22:17:37 +0100 Subject: [PATCH] port search to new api; use continuation tokens here too --- app/browse/__init__.py | 14 ++++++++------ app/browse/lib.py | 27 --------------------------- app/browse/protobuf.py | 5 +---- app/browse/templates/search.html.j2 | 5 +---- app/common/innertube.py | 26 ++++++++++++++++++-------- 5 files changed, 28 insertions(+), 49 deletions(-) diff --git a/app/browse/__init__.py b/app/browse/__init__.py index 48326ca..f03a8bf 100644 --- a/app/browse/__init__.py +++ b/app/browse/__init__.py @@ -18,9 +18,9 @@ frontend = Blueprint('browse', __name__, def search(): #token = getattr(current_user, 'token', 'guest') q = request.args.get('q') or request.args.get('search_query') - page = request.args.get('page', 1, type=int) + continuation = request.args.get('continuation') - sp = make_sp(page, **{ + sp = make_sp(**{ k:v for k,v in request.args.items() if k in ['sort','date','type','len'] }, features=[ @@ -31,17 +31,19 @@ def search(): if e in ['verbatim'] ]) - if q: - yt_results = fetch_searchresults(q, sp) + if continuation or q: + yt_results = fetch_ajax("search", **( + {'continuation': continuation} if continuation else {'query': q, 'params': sp} + )) - results, extras = prepare_searchresults(yt_results) + results, extras, continuation = prepare_searchresults(yt_results) for extra in extras: flash(extra, 'info') else: results = None - return render_template('search.html.j2', rows=results, query=q, page=page) + return render_template('search.html.j2', rows=results, query=q, continuation=continuation) @frontend.route('/channel//') @frontend.route('/channel//') diff --git a/app/browse/lib.py b/app/browse/lib.py index f5a4afa..c0c1c89 100644 --- a/app/browse/lib.py +++ b/app/browse/lib.py @@ -5,33 +5,6 @@ from datetime import datetime, timezone from ..common.common import fetch_xml, parse_xml from ..common.innertube import G -def fetch_searchresults(q=None, sp=None): - for _ in range(2): - today = datetime.now(timezone.utc).strftime("%Y%m%d") - r = requests.get(f"https://www.youtube.com/results", { - 'search_query': q, - 'pbj': 1, # makes youtube return a json-response - 'hl': 'en', #'en_US', - 'sp': sp, - }, headers={ - 'x-youtube-client-name': '1', - 'x-youtube-client-version': f'2.{today}.01.01', # the version is parsed as a date, and if it's invalid (e.g. month>12 or even feb>=30), youtube throws an encrypted stacktrace :D (but any random date >= 20160323 as of 20200802 works (even year 3000) - }) - if not r.ok: - return None - - # Sometimes, youtube throws an exception after the response already begun. - # This can manifest in two ways: - # 1) So the status code is 200, begins with JSON and switches to HTML half - # way through. WTF?! (This should be "fixed" by retrying, though) - # 2) The response just stopping mid-way through like this: response.text == - # '[\r\n{"page": "search","rootVe": "4724"},\r\n{"page": "search",' - # hence, just try-catching the decoding step is the easiest way out. - try: - return r.json() - except: - continue # will return None once we break out of the loop - def fetch_ajax(endpoint, **kwargs): """ fetch data using a continuation protobuf diff --git a/app/browse/protobuf.py b/app/browse/protobuf.py index bf5140e..df8c496 100644 --- a/app/browse/protobuf.py +++ b/app/browse/protobuf.py @@ -36,10 +36,8 @@ class SearchRequest: sorted: Optional[int64] = field(1, default=None) filter: Optional[Filters] = field(2, default=None) extras: Optional[Extras] = field(8, default=None) - offset: Optional[int64] = field(9, default=0) -# XXX: search pagination doesn't work -- probably moved to a continuation token -def make_sp(page=1, sort=None, date=None, type=None, len=None, features=[], extras=[]): +def make_sp(sort=None, date=None, type=None, len=None, features=[], extras=[]): sortorder = dict(relevance=0, rating=1, date=2, views=3) datefilter = dict(hour=1, day=2, week=3, month=4, year=5) typefilter = dict(video=1, channel=2, playlist=3, movie=4, show=5) @@ -54,7 +52,6 @@ def make_sp(page=1, sort=None, date=None, type=None, len=None, features=[], extr **{f:True for f in features}, ) if date or type or len or features else None, extras=Extras(**{f:True for f in extras}), - offset=(int(page)-1)*20, ).dumps()) # }}} SEARCH diff --git a/app/browse/templates/search.html.j2 b/app/browse/templates/search.html.j2 index 6b4e4b8..87ceb64 100644 --- a/app/browse/templates/search.html.j2 +++ b/app/browse/templates/search.html.j2 @@ -70,10 +70,7 @@
- {% if page > 1 %} - {{ macros.pagination("previous", {'page':(-1,1)}, -1) }} - {% endif %} - {{ macros.pagination("next", {'page':(+1,1)}, +1) }} + {{ macros.pagination("more", {'continuation': continuation}, +1) if continuation }}
{% else %} please type a search query. diff --git a/app/common/innertube.py b/app/common/innertube.py index c94235c..ccb5ead 100644 --- a/app/common/innertube.py +++ b/app/common/innertube.py @@ -59,15 +59,25 @@ class A: def prepare_searchresults(yt_results): - contents = listfind(yt_results, 'response') \ - .get('contents',{})\ - .get('twoColumnSearchResultsRenderer',{})\ - .get('primaryContents',{})\ - .get('sectionListRenderer',{})\ - .get('contents',[]) - contents = flatten([c.get('contents',[]) for c in contents|Select(all='itemSectionRenderer')]) + contents = ( # from continuation token + yt_results + |G('onResponseReceivedCommands') + |Select('appendContinuationItemsAction') + |G('continuationItems') + ) or ( # from page 1 + yt_results + |G('contents') + |G('twoColumnSearchResultsRenderer') + |G('primaryContents') + |G('sectionListRenderer') + |G('contents') + ) + items = flatten([c.get('contents',[]) for c in contents|Select(all='itemSectionRenderer')]) + items, extra = parse_result_items(items) + more = contents|Select("continuationItemRenderer")|G("continuationEndpoint")|G("continuationCommand")|G("token") + estimatedResults = yt_results|G("estimatedResults") - return parse_result_items(contents) + return items, extra, more def prepare_infocards(metadata): cards = metadata.get('cards',{}).get('cardCollectionRenderer',{}).get('cards',[]) -- 2.39.3