From 97b889916e395d70e7098a50de50b23679faa4b4 Mon Sep 17 00:00:00 2001 From: girst Date: Sat, 29 Aug 2020 10:20:34 +0200 Subject: [PATCH] implement 'pipe' framework and test on VIDEO searchresults --- app/common/innertube.py | 65 ++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/app/common/innertube.py b/app/common/innertube.py index 92328dd..825fa0b 100644 --- a/app/common/innertube.py +++ b/app/common/innertube.py @@ -16,6 +16,45 @@ flatten = lambda l: [item for sublist in l for item in sublist] # https://stacko first = lambda l: next(iter(l),{}) listfind = lambda obj,key: first(findall(obj,key)) +class G: + """ + null-coalescing version of dict.get() that also works on lists. + + the | operator is overloaded to achieve similar looking code to jq(1) filters. + """ + def __init__(self, key): + self.key = key + def __ror__(self, other): + try: return other[self.key] + except: return None + class _Text: + """ parses youtube's .runs[].text and .simpleText variants """ + def __ror__(self, other): # Note: only returning runs[0], not concat'ing all! + return other|G('simpleText') or other|G('runs')|G(0)|G('text') + text = _Text() +class Select: + """ |Select('foo') returns the first foo in list, |Select(all='foo') returns all foos. """ + def __init__(self, key=None, *, all=None): + self.key = key or all + self.all = all + def __ror__(self, other): + try: items = [ other[self.key] for other in other if self.key in other.keys() ] + except: items = [] + return items if self.all else items|G(0) +class A: + """ apply """ + def __init__(self, f, *args): + self.f = f + self.args = args + def __ror__(self, other): + return self.f(other, *self.args) + class _Int: + def __ror__(self, other): + try: return int(''.join(filter(str.isdigit, other))) + except: return None + int = _Int() + + def prepare_searchresults(yt_results): contents = listfind(yt_results, 'response') \ .get('contents',{})\ @@ -23,7 +62,7 @@ def prepare_searchresults(yt_results): .get('primaryContents',{})\ .get('sectionListRenderer',{})\ .get('contents',[]) - contents = flatten([c.get('contents',[]) for c in findall(contents, 'itemSectionRenderer')]) + contents = flatten([c.get('contents',[]) for c in contents|Select(all='itemSectionRenderer')]) return parse_result_items(contents) @@ -124,19 +163,17 @@ def parse_result_items(items): key = next(iter(item.keys()), None) content = item[key] if key == 'videoRenderer': - is_live = listfind(content.get('badges',[]), 'metadataBadgeRenderer').get('style') == 'BADGE_STYLE_TYPE_LIVE_NOW' results.append({'type': 'VIDEO', 'content': { 'video_id': content['videoId'], - 'title': content['title']['runs'][0]['text'], - 'author': content['longBylineText']['runs'][0]['text'] or \ - content['shortBylineText']['runs'][0]['text'], - 'channel_id': content['ownerText']['runs'][0] \ - ['navigationEndpoint']['browseEndpoint']['browseId'], - 'length': content.get('lengthText',{}).get('simpleText') \ - if not is_live else 'LIVE', # "44:07", "1:41:50" - 'views': toInt(content.get('viewCountText',{}).get('simpleText') or # "123,456 views", ... - listget(content.get('viewCountText',{}).get('runs',[]),0,{}).get('text')) or 0, # ... "1,234 watching", absent on 0 views - 'published': age(content.get('publishedTimeText',{}).get('simpleText')), + 'title': content['title']|G.text, + 'author': content|G('longBylineText')|G.text or \ + content|G('shortBylineText')|G.text, + 'channel_id': content|G('ownerText')|G('runs')|G(0) \ + |G('navigationEndpoint')|G('browseEndpoint')|G('browseId'), + 'length': content|G('lengthText')|G.text, # "44:07", "1:41:50" + 'views': content|G('viewCountText')|G.text|A.int or 0, # "1,234 {views|watching}", absent on 0 views + 'published': content|G('publishedTimeText')|G('simpleText')|A(age), + 'live': content|G('badges')|Select('metadataBadgeRenderer')|G('style')=='BADGE_STYLE_TYPE_LIVE_NOW', }}) elif key == 'playlistRenderer': results.append({'type': 'PLAYLIST', 'content': { @@ -208,8 +245,8 @@ def parse_result_items(items): elif key == 'messageRenderer': # "No more results" extras.append({ 'type': 'message', - 'message': content.get('title',{}).get('runs',[{}])[0].get('text') or \ - content.get('text',{}).get('runs',[{}])[0].get('text'), + 'message': content|G('title')|G('runs')|G(0)|G('text') or \ + content|G('text')|G('runs')|G(0)|G('text'), }) elif key == 'backgroundPromoRenderer': # e.g. "no results" extras.append({ -- 2.39.3