From 09dc209292bd482d1453f0b50a9c0702ed551af7 Mon Sep 17 00:00:00 2001 From: Anders Cornell Date: Sun, 2 Oct 2022 00:03:33 -0400 Subject: [PATCH] add the thing --- live-autojoin/README.md | 49 + live-autojoin/announce.py | 58 + live-autojoin/background.py | 180 +++ live-autojoin/common.py | 9 + live-autojoin/deploy | 5 + live-autojoin/service.py | 154 +++ .../systemd/live-autojoin-announce.service | 8 + .../systemd/live-autojoin-cron.service | 7 + .../systemd/live-autojoin-cron.timer | 3 + .../systemd/live-autojoin-service.service | 8 + .../systemd/live-autojoin-web.service | 7 + .../systemd/live-autojoin-web.socket | 4 + live-autojoin/systemd/live-autojoin.target | 5 + live-autojoin/templates/error.html | 10 + live-autojoin/templates/status.html | 13 + live-autojoin/web.py | 105 ++ sidebot/deploy | 6 + sidebot/setup.cfg | 17 + sidebot/sidebot/__init__.py | 0 sidebot/sidebot/main.py | 141 +++ sidebot/sidebot/notd_list | 1000 +++++++++++++++++ sidebot/sidebot/number.py | 35 + sidebot/sidebot/sidebar.py | 138 +++ sidebot/sidebot/strikebot_updates.py | 218 ++++ 24 files changed, 2180 insertions(+) create mode 100644 live-autojoin/README.md create mode 100644 live-autojoin/announce.py create mode 100644 live-autojoin/background.py create mode 100644 live-autojoin/common.py create mode 100755 live-autojoin/deploy create mode 100644 live-autojoin/service.py create mode 100644 live-autojoin/systemd/live-autojoin-announce.service create mode 100644 live-autojoin/systemd/live-autojoin-cron.service create mode 100644 live-autojoin/systemd/live-autojoin-cron.timer create mode 100644 live-autojoin/systemd/live-autojoin-service.service create mode 100644 live-autojoin/systemd/live-autojoin-web.service create mode 100644 live-autojoin/systemd/live-autojoin-web.socket create mode 100644 live-autojoin/systemd/live-autojoin.target create mode 100644 live-autojoin/templates/error.html create mode 100644 live-autojoin/templates/status.html create mode 100644 live-autojoin/web.py create mode 100755 sidebot/deploy create mode 100644 sidebot/setup.cfg create mode 100644 sidebot/sidebot/__init__.py create mode 100644 sidebot/sidebot/main.py create mode 100644 sidebot/sidebot/notd_list create mode 100644 sidebot/sidebot/number.py create mode 100644 sidebot/sidebot/sidebar.py create mode 100644 sidebot/sidebot/strikebot_updates.py diff --git a/live-autojoin/README.md b/live-autojoin/README.md new file mode 100644 index 0000000..3af2e1d --- /dev/null +++ b/live-autojoin/README.md @@ -0,0 +1,49 @@ +#Architecture + +background.py executed periodically (every 25 min). +service.py is a constantly-running service, uses postgresql asynchronous notifications to detect when there is work to do. +See file headers of the above to learn about their responsibilities. +background.py and service.py act on a "service"; this is sort of a multi-tenant capability. The default service name is "autojoin"; to use a different service name set the environment variable LIVEAUTOJOINSERVICE +service name is also used to find the database via "~/.pg\_service.conf" + +web.py is the Flask (wsgi) web application which handles /authorize, /invite, /ticket. + +flow: +1. user clicks join link (/authorize) in thread sidebar +2. app validates `thread` argument +3. app redirects to reddit OAuth2 /api/v1/authorize, state is : +4. user clicks 'Allow' +5. reddit redirects to /invite +6. app creates ticket linked to authorization code and `NOTIFY live_autojoin` +7. app redirects to /ticket (templates/status.html - auto-refreshing) +7. service.py retrieves authorization code to create authorization +8. service.py fills in ticket username using authorization +9. service.py invites user using admin authorization +10. service.py accepts invite using authorization +11. auto-refreshing /ticket starts redirecting back to the live event + +# Roadmap/wishlist +- allowed events: load more than 1 page of search results +- allowed events: search linkpost self text and "thread directory" wiki page for links, not just linkpost links +- when accepting admin invite, update reddit\_liveupdate\_event\_contributor table like when inviting users +- don't mention users in announcements +- respect sr bans +- check whether we will succeed (have permission, have scope, not too many outstanding invites) before adding ticket +- configurable (wiki page) allow/deny list for events +- invite sr moderators with extra permissions +- handle LIVEUPDATE\_TOO\_MANY\_INVITES (or whatever) +- actually report status/errors on /ticket +- handle no admin permission in `invite\_contributor` +- ticket processing rate limit (max 3 tickets in 60 seconds) - and if waiting on ratelimit, say so in status.html +- read modmail (and PMs?) and create tickets for messages with "inviteme" commands +- sync /live/mine (or allowed threads by\_id) to reddit\_liveupdate\_event table (background.py) +- include event title in error page's /message/compose link template +- after accepting admin invite, send PM to event creator (we don't know who created a thread, but we could find out who posted it in /r/livecounting and check if they have `settings` permission) with instructions for adding join link +- remove everyone's "close" permission? +- should be open-sourced, but needs deployment documentation and database schema script and pg\_reddit open-sourced first + +- find and fix DoS opportunities (something invalid submitted through web.py crashes service.py): better now. +- send "/u/username has joined this thread" updates: done. +- sync event contributor _invites_, not just contributors: done +- decide how to handle LIVEUPDATE\_ALREADY\_CONTRIBUTOR when `invite\_contributor`ing: done +- speculatively update invite table after `invite\_contributor`, speculatively update contributor table after `accept\_contributor\_invite`: done diff --git a/live-autojoin/announce.py b/live-autojoin/announce.py new file mode 100644 index 0000000..047ba77 --- /dev/null +++ b/live-autojoin/announce.py @@ -0,0 +1,58 @@ +# announce.py - announce new users + +import select +import urllib.parse +import urllib.request +import urllib.error +import base64 +import json + +from common import connect_db, service_name + +POLL_INTERVAL=3600 + +(cn, cr) = connect_db() + +cr.execute("LISTEN live_autojoin") + +while True: + cn.poll() + cn.notifies.clear() + work = False + while True: + cr.execute("BEGIN") + # - invited less than 1 minute ago + # - no announcement of same user in same event less than 30 days ago + # - user has update permission (not banned) + # - admin has update permission + # - admin authorization has submit scope + cr.execute("""SELECT access_token, admin_username, ticket.event_flake, username FROM live_autojoin_ticket AS ticket JOIN live_autojoin_service ON service_name=name JOIN live_autojoin_admin_authorization USING (service_name) JOIN reddit_app_authorization ON authorization_id=id JOIN reddit_liveupdate_event_contributor AS userrel ON userrel."type"='contributor' AND ticket.event_flake=userrel.event_flake AND username=userrel.name JOIN reddit_liveupdate_event_contributor AS adminrel ON adminrel."type"='contributor' AND ticket.event_flake = adminrel.event_flake AND admin_username = adminrel.name WHERE service_name = %s AND status = 'ok' AND updated_at + '1m' > CURRENT_TIMESTAMP AND NOT EXISTS (SELECT * FROM live_autojoin_announcement WHERE for_username = username AND event_flake = ticket.event_flake AND at + '30d' > updated_at) AND has_permission('update', userrel.permissions) AND has_permission('update', adminrel.permissions) AND array_position(scope, 'submit') IS NOT NULL LIMIT 1""", (service_name,)) + try: + [(access_token, admin_username, event_flake, username)] = cr.fetchall() + except ValueError: + break + else: + work = True + escaped_username = username.replace('_', '\\_') + usertext = f'*[\\/u\\/{ escaped_username }](/user/{ escaped_username }) has joined this thread*' + body = urllib.parse.urlencode({ 'api_type': 'json', 'body': usertext }).encode('utf-8') + req = urllib.request.Request(f'https://oauth.reddit.com/api/live/{event_flake}/update', data=body, method='POST') + req.add_header('Authorization', 'Bearer {}'.format(access_token)) + req.add_header('User-Agent', 'autojoin/0.1.0') + try: + res = json.load(urllib.request.urlopen(req)) + except urllib.error.HTTPError as e: + if e.code == 403: + admin_username = None + usertext = None + else: + raise + cr.execute("INSERT INTO live_autojoin_announcement (at, event_flake, for_username, author, body) VALUES (CURRENT_TIMESTAMP, %s, %s, %s, %s)", (event_flake, username, admin_username, usertext)) + finally: + cn.commit() + + cn.poll() + if work or len(cn.notifies) > 0: + continue + + select.select([cn], [], [], POLL_INTERVAL) diff --git a/live-autojoin/background.py b/live-autojoin/background.py new file mode 100644 index 0000000..f5a9a87 --- /dev/null +++ b/live-autojoin/background.py @@ -0,0 +1,180 @@ +# background.py - jobs that are not time-critical. +# - refresh access tokens +# - sync PMs +# - sync allowed threads (subreddit links) +# - accept admin invites +# - sync contributor lists +# - TODO sync subreddit bans and moderators +# - TODO sync modmail (to receive joining requests to modmail) + +# roadmap +# - for PM, allowed threads, and modmail sync, create a lightweight version to run frequently to handle new events quickly--distinct from the more expensive "full sync" that is implemented here. + +import urllib.request +import urllib.parse + +import json +import base64 +import re + +from common import connect_db, service_name + +# +# refresh tokens +# + +(cn, cr) = connect_db() + +def do_refresh_token(client_id, client_secret, refresh_token): + body = urllib.parse.urlencode({ 'grant_type': 'refresh_token', 'refresh_token': refresh_token }).encode('utf-8') + req = urllib.request.Request('https://www.reddit.com/api/v1/access_token', data=body, method='POST') + auth = base64.b64encode('{}:{}'.format(client_id, client_secret).encode('utf-8')).decode('utf-8') + req.add_header('Authorization', 'Basic {}'.format(auth)) + req.add_header('User-Agent', 'autojoin/0.1.0') + res = urllib.request.urlopen(req) + return json.load(res) + +while True: + cr.execute("BEGIN") + cr.execute("SELECT authorization_id, client_id, client_secret, refresh_token FROM live_autojoin_admin_authorization JOIN reddit_app_authorization ON authorization_id=id JOIN reddit_app USING (client_id) WHERE service_name = %s AND refresh_token IS NOT NULL AND expires < CURRENT_TIMESTAMP + '30min' LIMIT 1 FOR NO KEY UPDATE OF reddit_app_authorization", [service_name]) + try: + [(authorization_id, client_id, client_secret, refresh_token)] = cr.fetchall() + except ValueError: + cn.rollback() + break + else: + print('refreshing token for authorization {}'.format(authorization_id)) + new_token = do_refresh_token(client_id, client_secret, refresh_token) + cr.execute("UPDATE reddit_app_authorization SET access_token = %s, refresh_token = %s, scope = regexp_split_to_array(%s, ' ')::reddit_app_scope[], expires = CURRENT_TIMESTAMP + make_interval(secs => %s) WHERE id = %s", (new_token['access_token'], new_token['refresh_token'], new_token['scope'], new_token['expires_in'], authorization_id)) + cn.commit() + +# +# load PMs +# + +def flatten_privatemessage_listing(json_): + assert json_['kind'] == 'Listing' + for p in json_['data']['children']: + assert p['kind'] == 't4' + yield p['data'] + replies = p['data']['replies'] + if replies: + assert replies['kind'] == 'Listing' + for r in replies['data']['children']: + assert p['kind'] == 't4' + yield p['data'] + +def privatemessage_to_tuple(data): + id_ = data['id'] + parent_id = None + if data['parent_id'] is not None: + parent_id = data['parent_id'].split('_',maxsplit=1)[1] + assert int(parent_id, 36) == data['first_message'] + created = data['created_utc'] + sr = data['subreddit'] + author = None if data['author'] == '[deleted]' else data['author'] + if data['dest'].startswith('#'): + # modmail (destination is subreddit) + dest = None + dest_is_sr = True + else: + # destination is user + dest = data['dest'] + dest_is_sr = False + subject = data['subject'] + body = data['body'] + return (id_, parent_id, created, sr, author, dest, dest_is_sr, subject, body) + +cr.execute("BEGIN") +cr.execute("SELECT sr, access_token FROM live_autojoin_service JOIN live_autojoin_admin_authorization ON name = service_name JOIN reddit_app_authorization ON authorization_id=id JOIN reddit_app ON reddit_app.client_id=reddit_app_authorization.client_id WHERE service_name = %s", [service_name]) +[(sr, access_token)] = cr.fetchall() +cr.execute('CREATE TEMPORARY TABLE privatemessage_load (LIKE reddit_privatemessage INCLUDING INDEXES)') +after = None +while True: + if after: + rsc = 'https://oauth.reddit.com/message/messages?raw_json=1&limit=100&after={}'.format(after) + else: + rsc = 'https://oauth.reddit.com/message/messages?raw_json=1&limit=100' + req = urllib.request.Request(rsc, method='GET') + req.add_header('Authorization', 'Bearer {}'.format(access_token)) + req.add_header('User-Agent', 'autojoin/0.1.0') + res = json.load(urllib.request.urlopen(req)) + tuples = (privatemessage_to_tuple(d) for d in flatten_privatemessage_listing(res)) + cr.executemany('INSERT INTO privatemessage_load (id, parent_id, created, sr, author, dest, dest_is_sr, subject, body) VALUES (%s, %s, to_timestamp(%s), %s, %s, %s, %s, %s, %s)', tuples) + if 'after' in res: + after = res['after'] + else: + break +cr.execute("INSERT INTO reddit_privatemessage (id, parent_id, created, sr, author, dest, dest_is_sr, subject, body) SELECT id, parent_id, created, sr, author, dest, dest_is_sr, subject, body FROM privatemessage_load ON CONFLICT ON CONSTRAINT reddit_privatemessage_pkey DO NOTHING") +cr.execute("DROP TABLE privatemessage_load") +cn.commit() + +# +# build allowed thread list +# TODO look beyond first page +# TODO wiki config page for additional threads +# + +def flake_from_url(url_): + result = re.fullmatch('https?://[a-z]+\.reddit\.com/live/([a-z0-9]+)/?(?:\?.*)?', url_) + return result and result.group(1) + +def allowed_threads(): + req = urllib.request.Request('https://oauth.reddit.com/r/livecounting/search?q=url%3Alive+site%3Areddit.com+self%3Ano&restrict_sr=on&include_over_18=on&sort=new&t=all&limit=100', method='GET') + req.add_header('Authorization', 'Bearer {}'.format(access_token)) + req.add_header('User-Agent', 'autojoin/0.1.0') + res = json.load(urllib.request.urlopen(req)) + flakes = (flake_from_url(thing['data']['url']) for thing in res['data']['children'] if thing['data']['is_self'] is False) + return set((f for f in flakes if f)) + +cr.execute("BEGIN") +#cr.execute("DELETE FROM live_autojoin_allowed_event WHERE service_name = %s", (service_name,)) +cr.executemany("INSERT INTO live_autojoin_allowed_event (service_name, event_flake) VALUES (%s, %s) ON CONFLICT (service_name, event_flake) DO NOTHING", ((service_name, f) for f in allowed_threads())) +cn.commit() + +# accept admin invites + +cr.execute("BEGIN") +cr.execute("SELECT id, body FROM reddit_privatemessage JOIN live_autojoin_service ON dest = admin_username WHERE author = 'reddit' AND NOT dest_is_sr AND parent_id IS NULL AND subject LIKE 'invitation to contribute to %%' AND NOT EXISTS (SELECT * FROM live_autojoin_admin_invite WHERE privatemessage_id = id AND name = %s)", (service_name,)) +for (id_, body) in cr.fetchall(): + match = re.search('/live/([a-z0-9]+)/contributors', body) + event_flake = match and match.group(1) + result = None + if event_flake: + body = urllib.parse.urlencode({ 'api_type': 'json' }).encode('utf-8') + req = urllib.request.Request('https://oauth.reddit.com/api/live/{}/accept_contributor_invite'.format(event_flake), method='POST', data=body) + req.add_header('Authorization', 'Bearer {}'.format(access_token)) + req.add_header('User-Agent', 'autojoin/0.1.0') + urllib.request.urlopen(req) + result = 'ok' + cr.execute("INSERT INTO live_autojoin_admin_invite (privatemessage_id, event_flake, result) VALUES (%s, %s, %s)", [id_, event_flake, result]) +cn.commit() + +# load contributor lists + +cr.execute("BEGIN") +cr.execute("SELECT event_flake FROM live_autojoin_allowed_event WHERE service_name = %s", (service_name,)) +cn.commit() +for (event_flake,) in cr.fetchall(): + req = urllib.request.Request('https://oauth.reddit.com/live/{}/contributors'.format(event_flake), method='GET') + req.add_header('Authorization', 'Bearer {}'.format(access_token)) + req.add_header('User-Agent', 'autojoin/0.1.0') + contributors_lists = json.load(urllib.request.urlopen(req)) + # endpoint returns two listings, contributors and contributor _invites_, in JSON sequence. + # if not a contributor, cannot see contributor invites. + # in that case, no JSON sequence--simply the single listing is returned--annoying. + if isinstance(contributors_lists, dict): + empty_listing = {'kind': 'UserList', 'data': {'children': []}} + contributors_lists = [contributors_lists, empty_listing] + assert all((l['kind'] == 'UserList' for l in contributors_lists)) + contributors = ((event_flake, c['name'], '{{{}}}'.format(','.join(c['permissions'])), ty) for (ty, l) in zip(('contributor', 'invite'), contributors_lists) for c in l['data']['children']) + cr.execute("BEGIN") + cr.execute("DELETE FROM reddit_liveupdate_event_contributor WHERE event_flake = %s", (event_flake,)) + cr.executemany("INSERT INTO reddit_liveupdate_event_contributor (event_flake, name, permissions, \"type\") VALUES (%s, %s, %s::text[], %s)", contributors) + cn.commit() + print(event_flake) + +# TODO load subreddit bans (and moderators) +# TODO load modmail for joining requests + +cn.close() diff --git a/live-autojoin/common.py b/live-autojoin/common.py new file mode 100644 index 0000000..0e032d3 --- /dev/null +++ b/live-autojoin/common.py @@ -0,0 +1,9 @@ +import psycopg2 +import os + +service_name = os.environ.get('LIVEAUTOJOINSERVICE', 'autojoin') + +def connect_db(): + cn = psycopg2.connect("service={}".format(service_name)) + cr = cn.cursor() + return (cn, cr) diff --git a/live-autojoin/deploy b/live-autojoin/deploy new file mode 100755 index 0000000..eceeb7a --- /dev/null +++ b/live-autojoin/deploy @@ -0,0 +1,5 @@ +#!/bin/bash + +echo "Ok, but why aren't you using a package manager" + +tar cz {cert,privkey}.pem {background,service,announce,common,web}.py templates systemd | ssh root@hatnd.acorn.pw tar xzC /opt/live-autojoin diff --git a/live-autojoin/service.py b/live-autojoin/service.py new file mode 100644 index 0000000..a56c7b1 --- /dev/null +++ b/live-autojoin/service.py @@ -0,0 +1,154 @@ +# service.py - invite processing + +#- exchange authentication codes +#- fetch authorization identity to populate ticket username +#- invite users +#- accept invites + +import select +import urllib.parse +import urllib.request +import urllib.error +import base64 +import json + +from common import connect_db, service_name + +POLL_INTERVAL=3600 + +(cn, cr) = connect_db() + +cr.execute("LISTEN live_autojoin") + +while True: + cn.poll() + cn.notifies.clear() + work = False + while True: + cr.execute("BEGIN") + cr.execute("SELECT client_id, client_secret, redirect_uri, id, code FROM live_autojoin_service JOIN reddit_app USING (client_id) JOIN live_autojoin_ticket ON name = service_name JOIN reddit_app_authorization_code ON id = authorization_code_id WHERE authorization_id IS NULL AND service_name = %s LIMIT 1 FOR UPDATE OF live_autojoin_ticket, reddit_app_authorization_code", (service_name,)) + try: + [(client_id, client_secret, redirect_uri, authorization_code_id, code)] = cr.fetchall() + except ValueError: + break + else: + work = True + body = urllib.parse.urlencode({ 'grant_type': 'authorization_code', 'code': code, 'redirect_uri': redirect_uri }).encode('utf-8') + req = urllib.request.Request('https://www.reddit.com/api/v1/access_token', data=body, method='POST') + auth = base64.b64encode('{}:{}'.format(client_id, client_secret).encode('utf-8')).decode('utf-8') + req.add_header('Authorization', 'Basic {}'.format(auth)) + req.add_header('User-Agent', 'autojoin/0.1.0') + res = json.load(urllib.request.urlopen(req)) + if 'error' in res: + if res['error'] == 'invalid_grant': + cr.execute("DELETE FROM reddit_app_authorization_code WHERE id = %s", (authorization_code_id,)) + else: + raise ValueError("unrecognized error '{}'".format(res['error'])) + else: + assert res['token_type'] == 'bearer' + cr.execute("WITH q1 AS (INSERT INTO reddit_app_authorization (client_id, access_token, scope, expires) VALUES (%s, %s, regexp_split_to_array(%s, ' ')::reddit_app_scope[], CURRENT_TIMESTAMP + make_interval(secs => %s)) RETURNING id) UPDATE reddit_app_authorization_code SET authorization_id = q1.id FROM q1 WHERE reddit_app_authorization_code.id = %s", (client_id, res['access_token'], res['scope'], res['expires_in'], authorization_code_id)) + finally: + cn.commit() + + while True: + cr.execute("BEGIN") + cr.execute("SELECT reddit_app_authorization.id, access_token, ticket FROM live_autojoin_ticket JOIN reddit_app_authorization_code ON reddit_app_authorization_code.id=authorization_code_id JOIN reddit_app_authorization ON reddit_app_authorization.id = authorization_id WHERE service_name = %s AND username IS NULL AND array_position(scope, 'identity') IS NOT NULL LIMIT 1 FOR UPDATE OF live_autojoin_ticket", (service_name,)) + try: + [(authorization_id, access_token, ticket)] = cr.fetchall() + except ValueError: + break + else: + work = True + req = urllib.request.Request('https://oauth.reddit.com/api/v1/me', method='GET') + req.add_header('Authorization', 'Bearer {}'.format(access_token)) + req.add_header('User-Agent', 'autojoin/0.1.0') + try: + res = json.load(urllib.request.urlopen(req)) + except urllib.error.HTTPError as e: + if e.code == 401: + cr.execute("DELETE FROM reddit_app_authorization WHERE id = %s", (authorization_id,)) + else: + raise + else: + cr.execute("UPDATE live_autojoin_ticket SET username = %s WHERE service_name = %s AND ticket = %s", (res['name'], service_name, ticket)) + finally: + cn.commit() + + while True: + cr.execute("BEGIN") + cr.execute("SELECT access_token, ticket, event_flake, username FROM live_autojoin_ticket LEFT OUTER JOIN LATERAL (SELECT service_name, access_token FROM live_autojoin_admin_authorization JOIN reddit_app_authorization ON id=authorization_id WHERE service_name = live_autojoin_ticket.service_name LIMIT 1) q1 USING (service_name) WHERE service_name = %s AND username IS NOT NULL AND status IS NULL LIMIT 1 FOR NO KEY UPDATE OF live_autojoin_ticket", (service_name,)) + try: + [(access_token, ticket, event, username)] = cr.fetchall() + except ValueError: + break + else: + work = True + if access_token is None: + result = 'noadmin' + else: + body = urllib.parse.urlencode({ 'api_type': 'json', 'name': username, 'permissions': '+update', 'type': 'liveupdate_contributor_invite' }).encode('utf-8') + req = urllib.request.Request('https://oauth.reddit.com/api/live/{}/invite_contributor'.format(event), data=body, method='POST') + req.add_header('Authorization', 'Bearer {}'.format(access_token)) + req.add_header('User-Agent', 'autojoin/0.1.0') + try: + res = json.load(urllib.request.urlopen(req)) + except urllib.error.HTTPError as e: + if e.code == 403: + result = 'noadmin' + else: + raise + else: + errors = [er[0] for er in res['json']['errors']] + if not errors: + result = 'invited' + elif errors == ['LIVEUPDATE_ALREADY_CONTRIBUTOR']: + result = 'already_contributor_or_invited' + else: + raise RuntimeError(str(errors)) + if result == 'invited': + cr.execute("DELETE FROM reddit_liveupdate_event_contributor WHERE event_flake = %s AND name = %s", (event, username)) + cr.execute("INSERT INTO reddit_liveupdate_event_contributor (event_flake, name, permissions, type) VALUES (%s, %s, liveupdate_permission_set '+update', 'invite') ON CONFLICT DO NOTHING", (event, username)) + cr.execute("UPDATE live_autojoin_ticket SET status = %s, updated_at = CURRENT_TIMESTAMP WHERE service_name = %s AND ticket = %s", (result, service_name, ticket)) + finally: + cn.commit() + + while True: + cr.execute("BEGIN") + cr.execute("SELECT reddit_app_authorization.id, access_token, ticket, event_flake, username, status FROM live_autojoin_ticket JOIN reddit_app_authorization_code ON reddit_app_authorization_code.id=authorization_code_id JOIN reddit_app_authorization ON reddit_app_authorization.id = authorization_id WHERE service_name = %s AND status IN ('invited', 'already_contributor_or_invited') AND array_position(scope, 'livemanage') IS NOT NULL LIMIT 1 FOR NO KEY UPDATE OF live_autojoin_ticket", (service_name,)) + try: + [(authorization_id, access_token, ticket, event_flake, username, status)] = cr.fetchall() + except ValueError: + break + else: + work = True + body = urllib.parse.urlencode({ 'api_type': 'json' }).encode('utf-8') + req = urllib.request.Request('https://oauth.reddit.com/api/live/{}/accept_contributor_invite'.format(event_flake), data=body, method='POST') + req.add_header('Authorization', 'Bearer {}'.format(access_token)) + req.add_header('User-Agent', 'autojoin/0.1.0') + try: + res = json.load(urllib.request.urlopen(req)) + except urllib.error.HTTPError as e: + if e.code == 401: + cr.execute("DELETE FROM reddit_app_authorization WHERE id = %s", (authorization_id,)) + else: + raise + else: + errors = [er[0] for er in res['json']['errors']] + if not errors: + result = 'ok' + elif errors == ['LIVEUPDATE_NO_INVITE_FOUND']: + result = 'already_contributor' if status == 'already_contributor_or_invited' else None + else: + raise RuntimeError(str(errors)) + if result == 'ok': + cr.execute("UPDATE reddit_liveupdate_event_contributor SET type = 'contributor' WHERE event_flake = %s AND name = %s", (event, username)) + cr.execute("NOTIFY live_autojoin") # for announcements + cr.execute("UPDATE live_autojoin_ticket SET status = %s, updated_at = CURRENT_TIMESTAMP WHERE service_name = %s AND ticket = %s", (result, service_name, ticket)) + finally: + cn.commit() + + cn.poll() + if work or len(cn.notifies) > 0: + continue + + select.select([cn], [], [], POLL_INTERVAL) diff --git a/live-autojoin/systemd/live-autojoin-announce.service b/live-autojoin/systemd/live-autojoin-announce.service new file mode 100644 index 0000000..e00569c --- /dev/null +++ b/live-autojoin/systemd/live-autojoin-announce.service @@ -0,0 +1,8 @@ +[Service] +WorkingDirectory=/opt/live-autojoin +User=counting +Group=counting +Environment=LIVEAUTOJOINSERVICE=autojoin +Type=simple +ExecStart=python3 announce.py +Restart=always diff --git a/live-autojoin/systemd/live-autojoin-cron.service b/live-autojoin/systemd/live-autojoin-cron.service new file mode 100644 index 0000000..929dbe3 --- /dev/null +++ b/live-autojoin/systemd/live-autojoin-cron.service @@ -0,0 +1,7 @@ +[Service] +WorkingDirectory=/opt/live-autojoin +User=counting +Group=counting +Environment=LIVEAUTOJOINSERVICE=autojoin +Type=oneshot +ExecStart=python3 background.py diff --git a/live-autojoin/systemd/live-autojoin-cron.timer b/live-autojoin/systemd/live-autojoin-cron.timer new file mode 100644 index 0000000..5843509 --- /dev/null +++ b/live-autojoin/systemd/live-autojoin-cron.timer @@ -0,0 +1,3 @@ +[Timer] +OnActiveSec=0 +OnUnitActiveSec=25min diff --git a/live-autojoin/systemd/live-autojoin-service.service b/live-autojoin/systemd/live-autojoin-service.service new file mode 100644 index 0000000..d69c607 --- /dev/null +++ b/live-autojoin/systemd/live-autojoin-service.service @@ -0,0 +1,8 @@ +[Service] +WorkingDirectory=/opt/live-autojoin +User=counting +Group=counting +Environment=LIVEAUTOJOINSERVICE=autojoin +Type=simple +ExecStart=python3 service.py +Restart=always diff --git a/live-autojoin/systemd/live-autojoin-web.service b/live-autojoin/systemd/live-autojoin-web.service new file mode 100644 index 0000000..7b1ebba --- /dev/null +++ b/live-autojoin/systemd/live-autojoin-web.service @@ -0,0 +1,7 @@ +[Service] +WorkingDirectory=/opt/live-autojoin +User=counting +Group=counting +Environment=SCRIPT_NAME=/autojoin LIVEAUTOJOINSERVICE=autojoin +Type=simple # `notify` if gunicorn >= 20 +ExecStart=/usr/bin/gunicorn3 web:app diff --git a/live-autojoin/systemd/live-autojoin-web.socket b/live-autojoin/systemd/live-autojoin-web.socket new file mode 100644 index 0000000..be9f1a0 --- /dev/null +++ b/live-autojoin/systemd/live-autojoin-web.socket @@ -0,0 +1,4 @@ +[Socket] +ListenStream=/tmp/live-autojoin.socket +SocketUser=www-data +SocketGroup=www-data diff --git a/live-autojoin/systemd/live-autojoin.target b/live-autojoin/systemd/live-autojoin.target new file mode 100644 index 0000000..a028573 --- /dev/null +++ b/live-autojoin/systemd/live-autojoin.target @@ -0,0 +1,5 @@ +[Unit] +Requires=live-autojoin-cron.timer live-autojoin-service.service live-autojoin-web.socket live-autojoin-announce.service + +[Install] +WantedBy=multi-user.target diff --git a/live-autojoin/templates/error.html b/live-autojoin/templates/error.html new file mode 100644 index 0000000..d393326 --- /dev/null +++ b/live-autojoin/templates/error.html @@ -0,0 +1,10 @@ + + + + Autojoin: Error + + +

Error: {% block short %}unknown{% endblock %}

+ {% block long %}

Please message the moderators for assistance.

{% endblock %} + + diff --git a/live-autojoin/templates/status.html b/live-autojoin/templates/status.html new file mode 100644 index 0000000..dd87bea --- /dev/null +++ b/live-autojoin/templates/status.html @@ -0,0 +1,13 @@ + + + + Autojoin: Pending + + + +

Inviting you to this thread

+

This should only take a few seconds. If many users are trying to join right now, you might have to wait longer.

+

You will be redirected back to the live thread once you have been added.

+

If this message persists, please message the moderators for assistance.

+ + diff --git a/live-autojoin/web.py b/live-autojoin/web.py new file mode 100644 index 0000000..819cf97 --- /dev/null +++ b/live-autojoin/web.py @@ -0,0 +1,105 @@ +from flask import Flask, request, redirect, render_template, url_for + +import secrets +import re +import urllib.parse + +from common import connect_db + +DEFAULT_SERVICE = 'autojoin' + +app = Flask(__name__) + +def make_oauth_url(service_name, client_id, event, redirect_uri): + state = '{}:{}'.format(service_name, event) + scope = ' '.join(['identity','livemanage']) + qs = urllib.parse.urlencode({ + 'client_id': client_id, + 'response_type': 'code', + 'state': state, + 'redirect_uri': redirect_uri, + 'scope': scope, + 'duration': 'temporary' + }) + return 'https://www.reddit.com/api/v1/authorize?{}'.format(qs) + +def make_privatemessage_url(service_name, event): + if event and re.match('[a-z0-9]{10,}$', event): + body = '''I would like to join this thread: https://www.reddit.com/live/{1} + +(If you send this message with the following line intact, you will be invited automatically if possible) + +/autojoin service {0} event {1}'''.format(service_name, event) + elif event: + body = '''I would like to join this thread: {}'''.format(event) + else: + body = '''I would like to join this thread: (none)''' + assert re.match('[a-z0-9]+$', service_name) + qs = urllib.parse.urlencode({ + 'to': '/r/livecounting', + 'subject': 'Please invite me', + 'message': body + }) + return 'https://www.reddit.com/message/compose?{}'.format(qs) + +@app.route('/authorize') +def authorize(): + service_name = request.args.get('service', DEFAULT_SERVICE) + event = request.args.get('thread') + (cn, cr) = connect_db() + cr.execute("SELECT name, client_id, event_flake, redirect_uri FROM live_autojoin_allowed_event JOIN live_autojoin_service ON name = service_name JOIN reddit_app USING (client_id) WHERE service_name = %s AND event_flake = %s", (service_name, event)) + try: + [(service_name, client_id, event, redirect_uri)] = cr.fetchall() + except ValueError: + privatemessage_url = make_privatemessage_url(service_name, event) + return render_template("error.html", privatemessage_url=privatemessage_url) + else: + return redirect(make_oauth_url(service_name, client_id, event, redirect_uri), code=303) + finally: + cn.close() + +@app.route('/invite') +def invite(): + authorization_code = request.args.get('code') + state = request.args.get('state') + (service_name, event_flake) = state.split(':') + ticket = secrets.token_urlsafe() + (cn, cr) = connect_db() + cr.execute("BEGIN") + cr.execute("WITH q1 AS (INSERT INTO reddit_app_authorization_code (state, code) VALUES (%s, %s) RETURNING id) INSERT INTO live_autojoin_ticket (service_name, ticket, issued_at, event_flake, authorization_code_id, status) SELECT %s, %s, CURRENT_TIMESTAMP, %s, id, NULL FROM q1", (state, authorization_code, service_name, ticket, event_flake)) + cr.execute("NOTIFY live_autojoin") + cn.commit() + cn.close() + return redirect(url_for('ticket', service=service_name, ticket=ticket), code=303) + +@app.route('/ticket') +def ticket(): + service_name = request.args.get('service') + ticket = request.args.get('ticket') + (cn, cr) = connect_db() + cr.execute("SELECT event_flake, status FROM live_autojoin_ticket WHERE service_name = %s AND ticket = %s", (service_name, ticket)) + try: + [(event, status)] = cr.fetchall() + except ValueError: + event = None + status = 'error' + cn.close() + if status in ('ok', 'already_contributor'): + return redirect('https://www.reddit.com/live/{}'.format(event), code=308) + elif status == 'error': + privatemessage_url = make_privatemessage_url(service_name, event) + return render_template("error.html", privatemessage_url=privatemessage_url) + else: + privatemessage_url = make_privatemessage_url(service_name, event) + return render_template("status.html", privatemessage_url=privatemessage_url) + +# TODO +# - unallowed thread +# - allowed but disabled thread +# - authorization denied +# - indication of progress/errors + +#- exchange authentication codes +#- fetch authorization identity to populate ticket username +#- invite users +#- accept invites diff --git a/sidebot/deploy b/sidebot/deploy new file mode 100755 index 0000000..fc70ed8 --- /dev/null +++ b/sidebot/deploy @@ -0,0 +1,6 @@ +#!/bin/bash + +VERS=0.1.0 + +python3 -m build --no-isolation +tar c dist/sidebot-$VERS-py3-none-any.whl | ssh anders@bingus.internet6.net. tar x --strip-components=1 \&\& pip3 install --no-deps ./sidebot-$VERS-py3-none-any.whl diff --git a/sidebot/setup.cfg b/sidebot/setup.cfg new file mode 100644 index 0000000..21d5915 --- /dev/null +++ b/sidebot/setup.cfg @@ -0,0 +1,17 @@ +[metadata] +name = sidebot +version = 0.1.0 + +[options] +packages = find: +install_requires = + psycopg2 ~= 2.8 +include_package_data = True + +[options.package_data] +sidebot = + notd_list + +[options.entry_points] +console_scripts = + sidebot = sidebot.main:main diff --git a/sidebot/sidebot/__init__.py b/sidebot/sidebot/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sidebot/sidebot/main.py b/sidebot/sidebot/main.py new file mode 100644 index 0000000..8ea08bd --- /dev/null +++ b/sidebot/sidebot/main.py @@ -0,0 +1,141 @@ +import psycopg2 +import urllib.request +import json +import itertools +from argparse import ArgumentParser +from datetime import datetime, timezone +from uuid import UUID + +from .number import number_from_update, is_kget, is_notd +from .sidebar import find_parse_section, notd_banner_patch + +def main(): + + # retrieve run info + + parser = ArgumentParser() + parser.add_argument('service_name') + args = parser.parse_args() + + USER_AGENT='sidebot/0.1.0' + MAX_PAGES=3 + + dbconn = psycopg2.connect('') + db = dbconn.cursor() + db.execute(''' + SELECT + service_name, event_id, last_update_id, + CURRENT_TIMESTAMP-COALESCE(freeze_age, '5min'), + COALESCE(keep_kget, 10), + COALESCE(keep_notd, 10), + access_token + FROM sidebot_service t0 + JOIN reddit_app_authorization t1 ON t0.authorization_id=t1.id + WHERE service_name = %s AND CURRENT_TIMESTAMP < expires + FOR NO KEY UPDATE OF t0 + ''', (args.service_name,)) + [ (service_name, event_id, last_update_id, freeze_after, keep_kget, keep_notd, access_token) ] = db.fetchall() + last_update_dirty = False + + # walk unread updates + updates = [] + stopped_short = False + before_arg = "LiveUpdate_{}".format(last_update_id) if last_update_id else "" + for page_no in range(MAX_PAGES): + res = urllib.request.urlopen(urllib.request.Request( + 'https://www.reddit.com/live/{}.json?raw_json=1&limit=100&before={}'.format( + event_id, before_arg + ), headers={"User-Agent": USER_AGENT})) + if res.status != 200: + raise RuntimeError('HTTP {} {}'.format(res.status, res.reason)) + page = json.load(res)['data']['children'] + updates.extend((i['data'] for i in reversed(page))) + if before_arg != "" and len(page) > 0: + before_arg = page[0]['data']['name'] + else: + break + else: + stopped_short = True + + # update sidebar + res = urllib.request.urlopen(urllib.request.Request( + 'https://oauth.reddit.com/live/{}/about?raw_json=1'.format(event_id), + headers={"Authorization": "Bearer {}".format(access_token), + "User-Agent": USER_AGENT})) + if res.status != 200: + raise RuntimeError('HTTP {} {}'.format(res.status, res.reason)) + data = json.load(res)['data'] + resources = data['resources'] + nsfw_arg = str(int(data['nsfw'])) + + (_, kget_slice, kget) = find_parse_section(resources, 'GET', + event_id=event_id, retain=keep_kget) + (notd_pos, notd_slice, notd) = find_parse_section(resources, 'Number of the Day', + event_id=event_id, retain=keep_notd) + if len(updates) > 0 and not stopped_short: + now = datetime.fromtimestamp(updates[-1]['created_utc'], tz=timezone.utc) + (notd_banner_slice, notd_banner) = notd_banner_patch(resources, notd_pos, now) + + now_ts = None + time1 = UUID(last_update_id).time if last_update_id is not None else None + for u in updates: + ts = datetime.fromtimestamp(u['created_utc'], tz=timezone.utc) + u_uuid = UUID(u['id']) + if ts < freeze_after: + last_update_id = u['id'] + last_update_dirty = True + if now_ts is None or ts > now_ts: + now_ts = ts + + # delete lines for missing (id between previous and current update) (hence deleted) updates + if time1 is not None: + for processor in [kget, notd]: + processor.delete_between(time1, u_uuid.time) + time1 = u_uuid.time + + n = number_from_update(u) + delete_matching = u['stricken'] + for (processor, is_special) in [(kget, is_kget), (notd, is_notd)]: + found_matching = processor.take(u_uuid, delete_matching) + if n is not None and is_special(ts, n) and not u['stricken'] and not found_matching: + processor.insert(u, n) + + def patch_text(text, patches): + for (slice_, new_text) in sorted(patches, key=lambda patch: -patch[0].start): + text = text[:slice_.start] + new_text + text[slice_.stop:] + return text + + new_resources = patch_text(resources, itertools.chain(( + (slice_, proc.text()) for (slice_, proc) in [ + (kget_slice, kget), + (notd_slice, notd) + ] if proc.dirty), + ((notd_banner_slice, notd_banner) for d in range(int(not stopped_short and now_ts is not None))) + )) + + if new_resources != resources: + req = urllib.request.Request( + 'https://oauth.reddit.com/live/{}/edit'.format(event_id), + data=urllib.parse.urlencode({ + 'api_type': 'json', + 'title': data['title'], + 'description': data['description'], + 'resources': new_resources, + 'nsfw': str(int(data['nsfw'])) + }).encode(), + headers={ + 'User-Agent': USER_AGENT, + 'Authorization': 'Bearer {}'.format(access_token) + }, + method='POST' + ) + res = urllib.request.urlopen(req) + if res.status != 200: + raise RuntimeError('HTTP {} {}'.format(res.status, res.reason)) + + # update run info + if last_update_dirty: + db.execute(''' + UPDATE sidebot_service SET last_update_id=%s WHERE service_name=%s + ''', (last_update_id, service_name)) + dbconn.commit() diff --git a/sidebot/sidebot/notd_list b/sidebot/sidebot/notd_list new file mode 100644 index 0000000..cf6b6a6 --- /dev/null +++ b/sidebot/sidebot/notd_list @@ -0,0 +1,1000 @@ +2022-03-24 555 +2022-03-25 792 +2022-03-26 961 +2022-03-27 395 +2022-03-28 813 +2022-03-29 489 +2022-03-30 861 +2022-03-31 243 +2022-04-01 700 +2022-04-02 320 +2022-04-03 635 +2022-04-04 494 +2022-04-05 666 +2022-04-06 976 +2022-04-07 288 +2022-04-08 298 +2022-04-09 444 +2022-04-10 650 +2022-04-11 667 +2022-04-12 921 +2022-04-13 279 +2022-04-14 825 +2022-04-15 718 +2022-04-16 274 +2022-04-17 623 +2022-04-18 695 +2022-04-19 355 +2022-04-20 341 +2022-04-21 751 +2022-04-22 112 +2022-04-23 050 +2022-04-24 280 +2022-04-25 628 +2022-04-26 477 +2022-04-27 540 +2022-04-28 629 +2022-04-29 070 +2022-04-30 954 +2022-05-01 411 +2022-05-02 008 +2022-05-03 184 +2022-05-04 219 +2022-05-05 412 +2022-05-06 450 +2022-05-07 396 +2022-05-08 701 +2022-05-09 716 +2022-05-10 442 +2022-05-11 619 +2022-05-12 648 +2022-05-13 625 +2022-05-14 196 +2022-05-15 518 +2022-05-16 443 +2022-05-17 574 +2022-05-18 359 +2022-05-19 855 +2022-05-20 539 +2022-05-21 257 +2022-05-22 194 +2022-05-23 586 +2022-05-24 264 +2022-05-25 333 +2022-05-26 820 +2022-05-27 462 +2022-05-28 174 +2022-05-29 440 +2022-05-30 466 +2022-05-31 431 +2022-06-01 111 +2022-06-02 513 +2022-06-03 672 +2022-06-04 641 +2022-06-05 381 +2022-06-06 269 +2022-06-07 758 +2022-06-08 679 +2022-06-09 979 +2022-06-10 297 +2022-06-11 517 +2022-06-12 527 +2022-06-13 521 +2022-06-14 088 +2022-06-15 091 +2022-06-16 575 +2022-06-17 844 +2022-06-18 707 +2022-06-19 636 +2022-06-20 734 +2022-06-21 334 +2022-06-22 329 +2022-06-23 886 +2022-06-24 469 +2022-06-25 725 +2022-06-26 158 +2022-06-27 225 +2022-06-28 831 +2022-06-29 958 +2022-06-30 832 +2022-07-01 991 +2022-07-02 508 +2022-07-03 993 +2022-07-04 035 +2022-07-05 382 +2022-07-06 027 +2022-07-07 594 +2022-07-08 083 +2022-07-09 053 +2022-07-10 046 +2022-07-11 694 +2022-07-12 816 +2022-07-13 840 +2022-07-14 255 +2022-07-15 018 +2022-07-16 137 +2022-07-17 901 +2022-07-18 572 +2022-07-19 276 +2022-07-20 332 +2022-07-21 163 +2022-07-22 305 +2022-07-23 726 +2022-07-24 826 +2022-07-25 026 +2022-07-26 780 +2022-07-27 585 +2022-07-28 670 +2022-07-29 994 +2022-07-30 793 +2022-07-31 785 +2022-08-01 364 +2022-08-02 788 +2022-08-03 801 +2022-08-04 267 +2022-08-05 419 +2022-08-06 982 +2022-08-07 913 +2022-08-08 974 +2022-08-09 087 +2022-08-10 804 +2022-08-11 658 +2022-08-12 080 +2022-08-13 189 +2022-08-14 709 +2022-08-15 497 +2022-08-16 475 +2022-08-17 148 +2022-08-18 213 +2022-08-19 795 +2022-08-20 875 +2022-08-21 438 +2022-08-22 817 +2022-08-23 537 +2022-08-24 222 +2022-08-25 806 +2022-08-26 100 +2022-08-27 676 +2022-08-28 304 +2022-08-29 871 +2022-08-30 217 +2022-08-31 535 +2022-09-01 953 +2022-09-02 786 +2022-09-03 435 +2022-09-04 453 +2022-09-05 653 +2022-09-06 891 +2022-09-07 480 +2022-09-08 430 +2022-09-09 834 +2022-09-10 065 +2022-09-11 919 +2022-09-12 766 +2022-09-13 328 +2022-09-14 884 +2022-09-15 128 +2022-09-16 192 +2022-09-17 946 +2022-09-18 922 +2022-09-19 183 +2022-09-20 424 +2022-09-21 032 +2022-09-22 810 +2022-09-23 230 +2022-09-24 290 +2022-09-25 479 +2022-09-26 673 +2022-09-27 730 +2022-09-28 086 +2022-09-29 017 +2022-09-30 374 +2022-10-01 544 +2022-10-02 114 +2022-10-03 187 +2022-10-04 596 +2022-10-05 167 +2022-10-06 638 +2022-10-07 038 +2022-10-08 319 +2022-10-09 405 +2022-10-10 295 +2022-10-11 761 +2022-10-12 249 +2022-10-13 573 +2022-10-14 242 +2022-10-15 084 +2022-10-16 503 +2022-10-17 584 +2022-10-18 905 +2022-10-19 546 +2022-10-20 975 +2022-10-21 110 +2022-10-22 342 +2022-10-23 511 +2022-10-24 851 +2022-10-25 433 +2022-10-26 471 +2022-10-27 669 +2022-10-28 291 +2022-10-29 486 +2022-10-30 119 +2022-10-31 784 +2022-11-01 872 +2022-11-02 985 +2022-11-03 956 +2022-11-04 208 +2022-11-05 779 +2022-11-06 727 +2022-11-07 887 +2022-11-08 275 +2022-11-09 598 +2022-11-10 129 +2022-11-11 987 +2022-11-12 971 +2022-11-13 615 +2022-11-14 782 +2022-11-15 942 +2022-11-16 245 +2022-11-17 246 +2022-11-18 069 +2022-11-19 476 +2022-11-20 002 +2022-11-21 862 +2022-11-22 240 +2022-11-23 244 +2022-11-24 538 +2022-11-25 507 +2022-11-26 337 +2022-11-27 409 +2022-11-28 007 +2022-11-29 024 +2022-11-30 755 +2022-12-01 452 +2022-12-02 918 +2022-12-03 202 +2022-12-04 455 +2022-12-05 340 +2022-12-06 859 +2022-12-07 347 +2022-12-08 743 +2022-12-09 948 +2022-12-10 717 +2022-12-11 547 +2022-12-12 323 +2022-12-13 048 +2022-12-14 533 +2022-12-15 072 +2022-12-16 947 +2022-12-17 140 +2022-12-18 098 +2022-12-19 301 +2022-12-20 270 +2022-12-21 601 +2022-12-22 759 +2022-12-23 089 +2022-12-24 380 +2022-12-25 819 +2022-12-26 552 +2022-12-27 343 +2022-12-28 776 +2022-12-29 889 +2022-12-30 226 +2022-12-31 990 +2023-01-01 212 +2023-01-02 314 +2023-01-03 560 +2023-01-04 420 +2023-01-05 767 +2023-01-06 746 +2023-01-07 200 +2023-01-08 915 +2023-01-09 595 +2023-01-10 231 +2023-01-11 739 +2023-01-12 451 +2023-01-13 803 +2023-01-14 529 +2023-01-15 131 +2023-01-16 928 +2023-01-17 259 +2023-01-18 618 +2023-01-19 556 +2023-01-20 456 +2023-01-21 447 +2023-01-22 603 +2023-01-23 113 +2023-01-24 980 +2023-01-25 876 +2023-01-26 853 +2023-01-27 418 +2023-01-28 400 +2023-01-29 185 +2023-01-30 704 +2023-01-31 460 +2023-02-01 012 +2023-02-02 481 +2023-02-03 737 +2023-02-04 655 +2023-02-05 504 +2023-02-06 223 +2023-02-07 870 +2023-02-08 056 +2023-02-09 723 +2023-02-10 156 +2023-02-11 744 +2023-02-12 317 +2023-02-13 828 +2023-02-14 016 +2023-02-15 668 +2023-02-16 577 +2023-02-17 311 +2023-02-18 798 +2023-02-19 763 +2023-02-20 756 +2023-02-21 797 +2023-02-22 258 +2023-02-23 986 +2023-02-24 932 +2023-02-25 461 +2023-02-26 608 +2023-02-27 662 +2023-02-28 711 +2023-03-01 394 +2023-03-02 765 +2023-03-03 195 +2023-03-04 892 +2023-03-05 238 +2023-03-06 881 +2023-03-07 044 +2023-03-08 534 +2023-03-09 833 +2023-03-10 729 +2023-03-11 353 +2023-03-12 849 +2023-03-13 250 +2023-03-14 188 +2023-03-15 375 +2023-03-16 660 +2023-03-17 108 +2023-03-18 972 +2023-03-19 182 +2023-03-20 748 +2023-03-21 706 +2023-03-22 549 +2023-03-23 292 +2023-03-24 807 +2023-03-25 097 +2023-03-26 885 +2023-03-27 437 +2023-03-28 068 +2023-03-29 519 +2023-03-30 271 +2023-03-31 789 +2023-04-01 005 +2023-04-02 663 +2023-04-03 416 +2023-04-04 022 +2023-04-05 345 +2023-04-06 176 +2023-04-07 703 +2023-04-08 883 +2023-04-09 335 +2023-04-10 970 +2023-04-11 952 +2023-04-12 877 +2023-04-13 620 +2023-04-14 583 +2023-04-15 105 +2023-04-16 551 +2023-04-17 423 +2023-04-18 251 +2023-04-19 141 +2023-04-20 029 +2023-04-21 714 +2023-04-22 142 +2023-04-23 150 +2023-04-24 652 +2023-04-25 265 +2023-04-26 241 +2023-04-27 847 +2023-04-28 175 +2023-04-29 324 +2023-04-30 929 +2023-05-01 488 +2023-05-02 562 +2023-05-03 863 +2023-05-04 161 +2023-05-05 617 +2023-05-06 367 +2023-05-07 428 +2023-05-08 310 +2023-05-09 988 +2023-05-10 095 +2023-05-11 307 +2023-05-12 391 +2023-05-13 499 +2023-05-14 908 +2023-05-15 741 +2023-05-16 385 +2023-05-17 649 +2023-05-18 532 +2023-05-19 357 +2023-05-20 039 +2023-05-21 025 +2023-05-22 286 +2023-05-23 413 +2023-05-24 710 +2023-05-25 515 +2023-05-26 642 +2023-05-27 911 +2023-05-28 774 +2023-05-29 712 +2023-05-30 033 +2023-05-31 346 +2023-06-01 253 +2023-06-02 647 +2023-06-03 331 +2023-06-04 530 +2023-06-05 211 +2023-06-06 651 +2023-06-07 610 +2023-06-08 322 +2023-06-09 151 +2023-06-10 548 +2023-06-11 554 +2023-06-12 392 +2023-06-13 256 +2023-06-14 308 +2023-06-15 296 +2023-06-16 389 +2023-06-17 815 +2023-06-18 454 +2023-06-19 229 +2023-06-20 550 +2023-06-21 309 +2023-06-22 664 +2023-06-23 582 +2023-06-24 754 +2023-06-25 722 +2023-06-26 597 +2023-06-27 160 +2023-06-28 436 +2023-06-29 827 +2023-06-30 580 +2023-07-01 277 +2023-07-02 028 +2023-07-03 802 +2023-07-04 387 +2023-07-05 692 +2023-07-06 313 +2023-07-07 397 +2023-07-08 731 +2023-07-09 272 +2023-07-10 950 +2023-07-11 432 +2023-07-12 643 +2023-07-13 015 +2023-07-14 837 +2023-07-15 589 +2023-07-16 568 +2023-07-17 674 +2023-07-18 293 +2023-07-19 943 +2023-07-20 688 +2023-07-21 567 +2023-07-22 852 +2023-07-23 143 +2023-07-24 613 +2023-07-25 014 +2023-07-26 144 +2023-07-27 978 +2023-07-28 682 +2023-07-29 120 +2023-07-30 093 +2023-07-31 149 +2023-08-01 675 +2023-08-02 349 +2023-08-03 559 +2023-08-04 907 +2023-08-05 122 +2023-08-06 448 +2023-08-07 482 +2023-08-08 422 +2023-08-09 772 +2023-08-10 073 +2023-08-11 566 +2023-08-12 262 +2023-08-13 906 +2023-08-14 955 +2023-08-15 326 +2023-08-16 204 +2023-08-17 936 +2023-08-18 449 +2023-08-19 637 +2023-08-20 441 +2023-08-21 360 +2023-08-22 376 +2023-08-23 811 +2023-08-24 964 +2023-08-25 198 +2023-08-26 165 +2023-08-27 843 +2023-08-28 414 +2023-08-29 096 +2023-08-30 775 +2023-08-31 627 +2023-09-01 894 +2023-09-02 984 +2023-09-03 516 +2023-09-04 366 +2023-09-05 306 +2023-09-06 611 +2023-09-07 808 +2023-09-08 171 +2023-09-09 078 +2023-09-10 657 +2023-09-11 030 +2023-09-12 458 +2023-09-13 752 +2023-09-14 094 +2023-09-15 867 +2023-09-16 967 +2023-09-17 439 +2023-09-18 850 +2023-09-19 646 +2023-09-20 159 +2023-09-21 879 +2023-09-22 733 +2023-09-23 484 +2023-09-24 895 +2023-09-25 888 +2023-09-26 924 +2023-09-27 338 +2023-09-28 383 +2023-09-29 402 +2023-09-30 047 +2023-10-01 545 +2023-10-02 690 +2023-10-03 916 +2023-10-04 732 +2023-10-05 133 +2023-10-06 805 +2023-10-07 599 +2023-10-08 483 +2023-10-09 075 +2023-10-10 912 +2023-10-11 794 +2023-10-12 247 +2023-10-13 790 +2023-10-14 591 +2023-10-15 157 +2023-10-16 145 +2023-10-17 472 +2023-10-18 553 +2023-10-19 873 +2023-10-20 983 +2023-10-21 864 +2023-10-22 294 +2023-10-23 074 +2023-10-24 268 +2023-10-25 634 +2023-10-26 630 +2023-10-27 130 +2023-10-28 235 +2023-10-29 565 +2023-10-30 285 +2023-10-31 934 +2023-11-01 791 +2023-11-02 136 +2023-11-03 777 +2023-11-04 287 +2023-11-05 283 +2023-11-06 059 +2023-11-07 071 +2023-11-08 940 +2023-11-09 004 +2023-11-10 702 +2023-11-11 368 +2023-11-12 512 +2023-11-13 624 +2023-11-14 421 +2023-11-15 118 +2023-11-16 680 +2023-11-17 273 +2023-11-18 216 +2023-11-19 957 +2023-11-20 254 +2023-11-21 115 +2023-11-22 762 +2023-11-23 052 +2023-11-24 944 +2023-11-25 882 +2023-11-26 814 +2023-11-27 210 +2023-11-28 926 +2023-11-29 197 +2023-11-30 939 +2023-12-01 369 +2023-12-02 931 +2023-12-03 600 +2023-12-04 498 +2023-12-05 686 +2023-12-06 478 +2023-12-07 180 +2023-12-08 485 +2023-12-09 303 +2023-12-10 633 +2023-12-11 683 +2023-12-12 514 +2023-12-13 013 +2023-12-14 868 +2023-12-15 848 +2023-12-16 614 +2023-12-17 201 +2023-12-18 536 +2023-12-19 681 +2023-12-20 866 +2023-12-21 671 +2023-12-22 066 +2023-12-23 186 +2023-12-24 233 +2023-12-25 104 +2023-12-26 404 +2023-12-27 773 +2023-12-28 281 +2023-12-29 099 +2023-12-30 570 +2023-12-31 632 +2024-01-01 152 +2024-01-02 146 +2024-01-03 371 +2024-01-04 969 +2024-01-05 330 +2024-01-06 992 +2024-01-07 740 +2024-01-08 316 +2024-01-09 968 +2024-01-10 757 +2024-01-11 058 +2024-01-12 429 +2024-01-13 178 +2024-01-14 787 +2024-01-15 252 +2024-01-16 812 +2024-01-17 106 +2024-01-18 325 +2024-01-19 631 +2024-01-20 576 +2024-01-21 147 +2024-01-22 132 +2024-01-23 434 +2024-01-24 490 +2024-01-25 220 +2024-01-26 778 +2024-01-27 164 +2024-01-28 989 +2024-01-29 661 +2024-01-30 135 +2024-01-31 645 +2024-02-01 036 +2024-02-02 169 +2024-02-03 665 +2024-02-04 037 +2024-02-05 590 +2024-02-06 914 +2024-02-07 836 +2024-02-08 172 +2024-02-09 770 +2024-02-10 049 +2024-02-11 581 +2024-02-12 067 +2024-02-13 639 +2024-02-14 386 +2024-02-15 903 +2024-02-16 398 +2024-02-17 232 +2024-02-18 966 +2024-02-19 525 +2024-02-20 393 +2024-02-21 261 +2024-02-22 728 +2024-02-23 101 +2024-02-24 557 +2024-02-25 896 +2024-02-26 057 +2024-02-27 962 +2024-02-28 406 +2024-02-29 399 +2024-03-01 998 +2024-03-02 352 +2024-03-03 509 +2024-03-04 500 +2024-03-05 378 +2024-03-06 365 +2024-03-07 520 +2024-03-08 496 +2024-03-09 354 +2024-03-10 457 +2024-03-11 205 +2024-03-12 605 +2024-03-13 705 +2024-03-14 010 +2024-03-15 897 +2024-03-16 588 +2024-03-17 055 +2024-03-18 749 +2024-03-19 459 +2024-03-20 363 +2024-03-21 745 +2024-03-22 742 +2024-03-23 656 +2024-03-24 818 +2024-03-25 684 +2024-03-26 999 +2024-03-27 302 +2024-03-28 612 +2024-03-29 841 +2024-03-30 041 +2024-03-31 880 +2024-04-01 042 +2024-04-02 464 +2024-04-03 965 +2024-04-04 846 +2024-04-05 203 +2024-04-06 856 +2024-04-07 693 +2024-04-08 190 +2024-04-09 505 +2024-04-10 103 +2024-04-11 842 +2024-04-12 691 +2024-04-13 154 +2024-04-14 239 +2024-04-15 351 +2024-04-16 644 +2024-04-17 051 +2024-04-18 738 +2024-04-19 116 +2024-04-20 206 +2024-04-21 379 +2024-04-22 278 +2024-04-23 720 +2024-04-24 318 +2024-04-25 699 +2024-04-26 900 +2024-04-27 234 +2024-04-28 109 +2024-04-29 510 +2024-04-30 043 +2024-05-01 237 +2024-05-02 289 +2024-05-03 224 +2024-05-04 063 +2024-05-05 747 +2024-05-06 769 +2024-05-07 685 +2024-05-08 470 +2024-05-09 824 +2024-05-10 177 +2024-05-11 937 +2024-05-12 218 +2024-05-13 491 +2024-05-14 949 +2024-05-15 082 +2024-05-16 951 +2024-05-17 260 +2024-05-18 621 +2024-05-19 973 +2024-05-20 467 +2024-05-21 874 +2024-05-22 687 +2024-05-23 941 +2024-05-24 121 +2024-05-25 336 +2024-05-26 531 +2024-05-27 401 +2024-05-28 781 +2024-05-29 023 +2024-05-30 079 +2024-05-31 981 +2024-06-01 417 +2024-06-02 721 +2024-06-03 060 +2024-06-04 181 +2024-06-05 902 +2024-06-06 838 +2024-06-07 001 +2024-06-08 350 +2024-06-09 327 +2024-06-10 474 +2024-06-11 248 +2024-06-12 910 +2024-06-13 372 +2024-06-14 125 +2024-06-15 124 +2024-06-16 963 +2024-06-17 750 +2024-06-18 558 +2024-06-19 977 +2024-06-20 996 +2024-06-21 602 +2024-06-22 228 +2024-06-23 373 +2024-06-24 117 +2024-06-25 339 +2024-06-26 312 +2024-06-27 321 +2024-06-28 835 +2024-06-29 592 +2024-06-30 854 +2024-07-01 492 +2024-07-02 654 +2024-07-03 821 +2024-07-04 214 +2024-07-05 959 +2024-07-06 904 +2024-07-07 227 +2024-07-08 045 +2024-07-09 139 +2024-07-10 502 +2024-07-11 031 +2024-07-12 487 +2024-07-13 561 +2024-07-14 377 +2024-07-15 593 +2024-07-16 344 +2024-07-17 563 +2024-07-18 266 +2024-07-19 076 +2024-07-20 425 +2024-07-21 123 +2024-07-22 522 +2024-07-23 564 +2024-07-24 506 +2024-07-25 040 +2024-07-26 526 +2024-07-27 034 +2024-07-28 021 +2024-07-29 207 +2024-07-30 768 +2024-07-31 179 +2024-08-01 426 +2024-08-02 236 +2024-08-03 809 +2024-08-04 199 +2024-08-05 933 +2024-08-06 020 +2024-08-07 909 +2024-08-08 995 +2024-08-09 736 +2024-08-10 640 +2024-08-11 390 +2024-08-12 011 +2024-08-13 542 +2024-08-14 299 +2024-08-15 446 +2024-08-16 893 +2024-08-17 713 +2024-08-18 501 +2024-08-19 410 +2024-08-20 282 +2024-08-21 468 +2024-08-22 362 +2024-08-23 917 +2024-08-24 899 +2024-08-25 000 +2024-08-26 997 +2024-08-27 783 +2024-08-28 407 +2024-08-29 708 +2024-08-30 493 +2024-08-31 019 +2024-09-01 869 +2024-09-02 898 +2024-09-03 062 +2024-09-04 845 +2024-09-05 760 +2024-09-06 604 +2024-09-07 923 +2024-09-08 162 +2024-09-09 127 +2024-09-10 090 +2024-09-11 607 +2024-09-12 626 +2024-09-13 865 +2024-09-14 215 +2024-09-15 753 +2024-09-16 616 +2024-09-17 822 +2024-09-18 945 +2024-09-19 858 +2024-09-20 698 +2024-09-21 771 +2024-09-22 085 +2024-09-23 445 +2024-09-24 609 +2024-09-25 081 +2024-09-26 829 +2024-09-27 300 +2024-09-28 193 +2024-09-29 960 +2024-09-30 092 +2024-10-01 541 +2024-10-02 356 +2024-10-03 659 +2024-10-04 697 +2024-10-05 796 +2024-10-06 107 +2024-10-07 403 +2024-10-08 719 +2024-10-09 696 +2024-10-10 677 +2024-10-11 878 +2024-10-12 284 +2024-10-13 388 +2024-10-14 927 +2024-10-15 569 +2024-10-16 724 +2024-10-17 930 +2024-10-18 054 +2024-10-19 935 +2024-10-20 361 +2024-10-21 221 +2024-10-22 465 +2024-10-23 622 +2024-10-24 170 +2024-10-25 839 +2024-10-26 415 +2024-10-27 427 +2024-10-28 077 +2024-10-29 370 +2024-10-30 587 +2024-10-31 925 +2024-11-01 134 +2024-11-02 315 +2024-11-03 571 +2024-11-04 735 +2024-11-05 191 +2024-11-06 408 +2024-11-07 006 +2024-11-08 166 +2024-11-09 579 +2024-11-10 764 +2024-11-11 543 +2024-11-12 823 +2024-11-13 102 +2024-11-14 800 +2024-11-15 857 +2024-11-16 168 +2024-11-17 473 +2024-11-18 348 +2024-11-19 890 +2024-11-20 209 +2024-11-21 920 +2024-11-22 064 +2024-11-23 578 +2024-11-24 263 +2024-11-25 606 +2024-11-26 009 +2024-11-27 938 +2024-11-28 524 +2024-11-29 126 +2024-11-30 061 +2024-12-01 799 +2024-12-02 463 +2024-12-03 830 +2024-12-04 689 +2024-12-05 003 +2024-12-06 860 +2024-12-07 155 +2024-12-08 715 +2024-12-09 523 +2024-12-10 678 +2024-12-11 384 +2024-12-12 528 +2024-12-13 173 +2024-12-14 138 +2024-12-15 153 +2024-12-16 358 +2024-12-17 495 \ No newline at end of file diff --git a/sidebot/sidebot/number.py b/sidebot/sidebot/number.py new file mode 100644 index 0000000..d391a3c --- /dev/null +++ b/sidebot/sidebot/number.py @@ -0,0 +1,35 @@ +from importlib import resources +import csv +from datetime import date +import zoneinfo + +from .strikebot_updates import parse_update + +with resources.files('sidebot').joinpath('notd_list').open() as notd_file: + notd_list = { date.fromisoformat(row[0]): int(row[1]) for row in csv.reader(notd_file, delimiter='\t', quoting=csv.QUOTE_NONE) if row[0] } + +notd_tz = zoneinfo.ZoneInfo('US/Eastern') + +def number_from_update(u): + return parse_update(u, None, '').number + +def format_n(n): + return '{:,}'.format(n) + +def is_kget(ts, n): + return n > 0 and n % 1000 == 0 + +def is_notd(ts, n): + if n <= 0: + return False + residue = n % 1000 + ts_date = ts.astimezone(notd_tz).date() + return ts_date in notd_list and residue == int(notd_list[ts_date]) + +def notd_banner(ts): + notd_key = ts.astimezone(notd_tz).date() + if notd_key in notd_list: + notd_str = '{:03d}'.format(notd_list[notd_key]) + else: + notd_str = '???' + return '{}: XX,XXX,{}'.format(notd_key, notd_str) diff --git a/sidebot/sidebot/sidebar.py b/sidebot/sidebot/sidebar.py new file mode 100644 index 0000000..ac856ca --- /dev/null +++ b/sidebot/sidebot/sidebar.py @@ -0,0 +1,138 @@ +import re +from datetime import datetime +from uuid import UUID + +from .number import format_n, notd_banner + +def uuid_from_regular_line(event_id, l): + match = re.match(r'\* \[[1-9]\d{0,2}(?:,\d{3})*\]\(/live/([a-z0-9]+)/updates/([0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})\) - /u/[0-9a-zA-Z_-]{3,20}', l) + if not match or match.group(1) != event_id: + return None + return UUID(match.group(2)) + +def find_parse_section(text, section_name, **kwargs): + marker1 = '[](#sidebar "start {}")\n\n'.format(section_name) + marker2 = '[](#sidebar "end {}")'.format(section_name) + + ix0 = text.index(marker1) + if ix0 == -1: + return None + ix1 = ix0 + len(marker1) + ix2 = text.index(marker2, ix1) + + raw_lines = [] if ix1 == ix2 else text[ix1:ix2].split('\n') + line_processor = LineProcessor(reversed(raw_lines), **kwargs) + return (ix0, slice(ix1, ix2), line_processor) + +def notd_banner_patch(text, notd_marker1, now): + banner_line_end = text.rfind('\n', 0, notd_marker1) + if banner_line_end == -1: # marker line is first line; we'll insert + banner_line_end = 0 + banner_line_begin = text.rfind('\n', 0, banner_line_end) + 1 + + new_text = notd_banner(now).replace('\n', '') + return (slice(banner_line_begin, banner_line_end), new_text) + +class LineProcessor: + def __init__(self, lines, event_id, retain=10): + self.event_id = event_id + self.lines = [(l, uuid_from_regular_line(event_id, l)) for l in lines] + self.i = 0 + self.i_prev_uuid = None + self.j = -retain #? + self.dirty = False + + def delete_between(self, time1, time2): + while not self.end(): + if self.valid(): + if time1 < self.time() < time2: + self.delete() + continue + elif self.time() >= time2: + break + self.next() + + # advance through lines matching target_uuid, skipping invalid ones. + # delete matching lines if delete=True. + # return whether there were matching lines. + def take(self, target_uuid, delete_matching): + found_matching = False + while not self.end(): + if self.valid(): + if self.time() > target_uuid.time: + break + if self.uuid() == target_uuid: + found_matching = True + if delete_matching: + self.delete() + continue + self.next() + return found_matching + + # Advance the i pointer. If advancing through a regular, valid, novel line then also + # advance the j pointer afterward. + def next(self): + advance_j = False + if self.valid(): + if self.i_prev_uuid != self.uuid(): # novel + self.i_prev_uuid = self.uuid() + advance_j = True + self.i += 1 + if advance_j: + self.next_j() + + # Advance the j pointer by deleting one or more regular valid lines of a single id + def next_j(self): + if self.j < 0: # j behind start of list - no actual lines, nothing to delete + self.j += 1 + return + assert self.j < self.i # we should not advance j without a line to look at + if self.j == 0: # starting out: skip over initial non-regular lines + while self.j < self.i and self.lines[self.j][1] is None: + self.j += 1 + assert self.j < self.i # we should not advance j without a regular line to look at + cur_uuid = self.lines[self.j][1] + assert cur_uuid is not None + while self.j < self.i: # don't let j advance beyond i (proxy for end of list) + if (self.lines[self.j][1] is None # non-regular + or self.lines[self.j][1].time > cur_uuid.time): # invalid + self.j += 1 # skip over + else: + if self.lines[self.j][1] != cur_uuid: # novel, stop + break + # line at self.j is valid, regular, not novel: delete + del self.lines[self.j] + self.i -= 1 + self.dirty = True + + def insert(self, u, n): + str_ = '* [{}](/live/{}/updates/{}) - /u/{}'.format( + format_n(n), self.event_id, u['id'], u['author']) + self.lines.insert(self.i, (str_, UUID(u['id']))) + self.next() + self.dirty = True + + def delete(self): + assert self.i < len(self.lines) + del self.lines[self.i] + self.dirty = True + + def end(self): + return self.i == len(self.lines) + + def uuid(self): + assert not self.end() + return self.lines[self.i][1] + + def time(self): + assert not self.end() + return self.uuid().time + + def valid(self): + assert not self.end() + if self.uuid() is None: + return False + return self.i_prev_uuid is None or self.time() >= self.i_prev_uuid.time + + def text(self): + return '\n'.join((l[0] for l in reversed(self.lines))) diff --git a/sidebot/sidebot/strikebot_updates.py b/sidebot/sidebot/strikebot_updates.py new file mode 100644 index 0000000..6e072db --- /dev/null +++ b/sidebot/sidebot/strikebot_updates.py @@ -0,0 +1,218 @@ +# copied from strikebot/updates.py + +from __future__ import annotations +from dataclasses import dataclass +from enum import Enum +from typing import Optional +import re + +from bs4 import BeautifulSoup + + +Command = Enum("Command", ["RESET", "REPORT"]) + + +@dataclass +class ParsedUpdate: + number: Optional[int] + command: Optional[Command] + count_attempt: bool # either well-formed or typo + deletable: bool + + +def _parse_command(line: str, bot_user: str) -> Optional[Command]: + if line.lower() == f"/u/{bot_user} reset".lower(): + return Command.RESET + elif line.lower() in ["sidebar count", "current count"]: + return Command.REPORT + else: + return None + + +def parse_update(payload_data: dict, curr_count: Optional[int], bot_user: str) -> ParsedUpdate: + # curr_count is the next number up, one more than the last count + + NEW_LINE = object() + SPACE = object() + + # flatten the update content to plain text + tree = BeautifulSoup(payload_data["body_html"], "html.parser") + worklist = tree.contents + out = [[]] + while worklist: + el = worklist.pop() + if isinstance(el, str): + out[-1].append(el) + elif el is SPACE: + out[-1].append(el) + elif el is NEW_LINE or el.name == "br" or el.name == "hr": + if out[-1]: + out.append([]) + elif el.name in ["em", "strong", "del", "span", "sup", "code", "a", "th", "td"]: + worklist.extend(reversed(el.contents)) + elif el.name in ["ul", "ol", "table", "thead", "tbody"]: + worklist.extend(reversed(el.contents)) + elif el.name in ["li", "p", "div", "blockquote"] or re.match(r"h[1-6]$", el.name): + worklist.append(NEW_LINE) + worklist.extend(reversed(el.contents)) + worklist.append(NEW_LINE) + elif el.name == "pre": + worklist.append(NEW_LINE) + worklist.extend([l] for l in reversed(el.text.splitlines())) + worklist.append(NEW_LINE) + elif el.name == "tr": + worklist.append(NEW_LINE) + for (i, cell) in enumerate(reversed(el.contents)): + worklist.append(cell) + if i != len(el.contents) - 1: + worklist.append(SPACE) + worklist.append(NEW_LINE) + else: + raise RuntimeError(f"can't parse tag {el.name}") + + tmp_lines = ( + "".join(" " if part is SPACE else part for part in parts).strip() + for parts in out + ) + pre_strip_lines = list(filter(None, tmp_lines)) + + # normalize whitespace according to HTML rendering rules + # https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace#explanation + stripped_lines = [ + re.sub(" +", " ", l.replace("\t", " ").replace("\n", " ")).strip(" ") + for l in pre_strip_lines + ] + + return _parse_from_lines(stripped_lines, curr_count, bot_user) + + +def _parse_from_lines(lines: list[str], curr_count: Optional[int], bot_user: str) -> ParsedUpdate: + command = next( + filter(None, (_parse_command(l, bot_user) for l in lines)), + None + ) + if lines: + # look for groups of digits (as many as possible) separated by a uniform separator from the valid set + first = lines[0] + match = re.match( + "(?Pv)?(?P-)?(?P\\d+((?P[,. \u2009]|, )\\d+((?P=sep)\\d+)*)?)", + first, + re.ASCII, # only recognize ASCII digits + ) + if match: + raw_digits = match["num"] + sep = match["sep"] + post = first[match.end() :] + + zeros = False + while len(raw_digits) > 1 and raw_digits[0] == "0": + zeros = True + raw_digits = raw_digits.removeprefix("0").removeprefix(sep or "") + + parts = raw_digits.split(sep) if sep else [raw_digits] + lone = len(lines) == 1 and (not post or post.isspace()) + typo = False + if lone: + all_parts_valid = ( + sep is None + or ( + 1 <= len(parts[0]) <= 3 + and all(len(p) == 3 for p in parts[1:]) + ) + ) + if match["v"] and len(parts) == 1 and len(parts[0]) <= 2: + # failed paste of leading digits + typo = True + elif match["v"] and all_parts_valid: + # v followed by count + typo = True + elif curr_count is not None and abs(curr_count) >= 100 and bool(match["neg"]) == (curr_count < 0): + goal_parts = _separate(str(abs(curr_count))) + partials = [ + goal_parts[: -1] + [goal_parts[-1][: -1]], # missing last digit + goal_parts[: -1] + [goal_parts[-1][: -2]], # missing last two digits + goal_parts[: -1] + [goal_parts[-1][: -2] + goal_parts[-1][-1]], # missing second-last digit + ] + if parts in partials: + # missing any of last two digits + typo = True + elif parts in [p[: -1] + [p[-1] + goal_parts[0]] + goal_parts[1 :] for p in partials]: + # double paste + typo = True + + if match["v"] or zeros or typo or (parts == ["0"] and match["neg"]): + number = None + count_attempt = True + deletable = lone + else: + if curr_count is not None and sep and sep.isspace(): + # Presume that the intended count consists of as many valid digit groups as necessary to match the + # number of digits in the expected count, if possible. + digit_count = len(str(abs(curr_count))) + use_parts = [] + accum = 0 + for (i, part) in enumerate(parts): + part_valid = len(part) <= 3 if i == 0 else len(part) == 3 + if part_valid and accum < digit_count: + use_parts.append(part) + accum += len(part) + else: + break + + # could still be a no-separator count with some extra digit groups on the same line + if not use_parts: + use_parts = [parts[0]] + + lone = lone and len(use_parts) == len(parts) + else: + # current count is unknown or no separator was used + use_parts = parts + + digits = "".join(use_parts) + number = -int(digits) if match["neg"] else int(digits) + special = ( + curr_count is not None + and abs(number - curr_count) <= 25 + and _is_special_number(number) + ) + deletable = lone and not special + if len(use_parts) == len(parts) and post and not post[0].isspace(): + count_attempt = curr_count is not None and abs(number - curr_count) <= 25 + number = None + else: + count_attempt = True + else: + # no count attempt found + number = None + count_attempt = False + deletable = False + else: + # no lines in update + number = None + count_attempt = False + deletable = True + + return ParsedUpdate( + number = number, + command = command, + count_attempt = count_attempt, + deletable = deletable, + ) + + +def _separate(digits: str) -> list[str]: + mod = len(digits) % 3 + out = [] + if mod: + out.append(digits[: mod]) + out.extend(digits[i : i + 3] for i in range(mod, len(digits), 3)) + return out + + +def _is_special_number(num: int) -> bool: + num_str = str(num) + return bool( + num % 1000 in [0, 1, 333, 999] + or (num > 10_000_000 and "".join(reversed(num_str)) == num_str) + or re.match(r"(.+)\1+$", num_str) # repeated sequence + ) -- 2.30.2