From: Jakob Cornell <jakob+gpg@jcornell.net>
Date: Sun, 6 Nov 2022 16:33:30 +0000 (-0600)
Subject: Autojoin: load all search results to determine allowed threads
X-Git-Tag: strikebot-0.0.7~2
X-Git-Url: https://jcornell.net/gitweb/gitweb.cgi?a=commitdiff_plain;h=98e49e2b1d2109eb91477b36bfb4f93cd9c00265;p=counting.git

Autojoin: load all search results to determine allowed threads
---

diff --git a/joinbot/README.md b/joinbot/README.md
index ba16273..4322653 100644
--- a/joinbot/README.md
+++ b/joinbot/README.md
@@ -23,7 +23,6 @@ flow:
 11. auto-refreshing /ticket starts redirecting back to the live event
 
 # Roadmap/wishlist
-- allowed events: load more than 1 page of search results
 - allowed events: search linkpost self text and "thread directory" wiki page for links, not just linkpost links
 - configurable (wiki page) allow/deny list for events
 
diff --git a/joinbot/joinbot/background.py b/joinbot/joinbot/background.py
index abfe534..edf2b20 100644
--- a/joinbot/joinbot/background.py
+++ b/joinbot/joinbot/background.py
@@ -10,6 +10,7 @@
 # roadmap
 # - for PM, allowed threads, and modmail sync, create a lightweight version to run frequently to handle new events quickly--distinct from the more expensive "full sync" that is implemented here.
 
+from urllib.parse import urlencode, urlparse, urlunparse
 import urllib.request
 import urllib.parse
 
@@ -122,12 +123,38 @@ def main():
 		return result and result.group(1)
 
 	def allowed_threads():
-		req = urllib.request.Request('https://oauth.reddit.com/r/livecounting/search?q=url%3Alive+site%3Areddit.com+self%3Ano&restrict_sr=on&include_over_18=on&sort=new&t=all&limit=100', method='GET')
-		req.add_header('Authorization', 'Bearer {}'.format(access_token))
-		req.add_header('User-Agent', 'autojoin/0.1.0')
-		res = json.load(urllib.request.urlopen(req))
-		flakes = (flake_from_url(thing['data']['url']) for thing in res['data']['children'] if thing['data']['is_self'] is False)
-		return set((f for f in flakes if f))
+		flakes = []
+		params = {
+			"q": "url:live+site:reddit.com+self:no",
+			"restrict_sr": "on",
+			"include_over_18": "on",
+			"sort": "new",
+			"t": "all",
+			"limit": "100",
+		}
+		while True:
+			req = urllib.request.Request(
+				urlunparse(
+					urlparse("https://oauth.reddit.com/r/livecounting/search")
+					._replace(query = urlencode(params))
+				),
+				method = "GET",
+				headers = {
+					"Authorization": "Bearer {}".format(access_token),
+					"User-Agent": "autojoin/0.1.0",
+				}
+			)
+			with urllib.request.urlopen(req) as resp:
+				data = json.load(resp)
+			flakes.extend(
+				flake_from_url(thing["data"]["url"])
+				for thing in data["data"]["children"]
+				if not thing["data"]["is_self"]
+			)
+			if data["after"] is None:
+				return set(filter(None, flakes))
+			else:
+				params["after"] = data["after"]
 
 	cr.execute("BEGIN")
 	#cr.execute("DELETE FROM live_autojoin_allowed_event WHERE service_name = %s", (service_name,))