Autojoin: load all search results to determine allowed threads
authorJakob Cornell <jakob+gpg@jcornell.net>
Sun, 6 Nov 2022 16:33:30 +0000 (10:33 -0600)
committerJakob Cornell <jakob+gpg@jcornell.net>
Sun, 6 Nov 2022 16:44:56 +0000 (10:44 -0600)
joinbot/README.md
joinbot/joinbot/background.py

index ba1627300917b4c664d540afe729875b623f8596..43226537365d007de0b5e2c39b4c71549f81f259 100644 (file)
@@ -23,7 +23,6 @@ flow:
 11. auto-refreshing /ticket starts redirecting back to the live event
 
 # Roadmap/wishlist
-- allowed events: load more than 1 page of search results
 - allowed events: search linkpost self text and "thread directory" wiki page for links, not just linkpost links
 - configurable (wiki page) allow/deny list for events
 
index abfe534e5490cc45e15306313f848ac4b051cd87..edf2b20c64c02c1d899eda9debaff63808112c3b 100644 (file)
@@ -10,6 +10,7 @@
 # roadmap
 # - for PM, allowed threads, and modmail sync, create a lightweight version to run frequently to handle new events quickly--distinct from the more expensive "full sync" that is implemented here.
 
+from urllib.parse import urlencode, urlparse, urlunparse
 import urllib.request
 import urllib.parse
 
@@ -122,12 +123,38 @@ def main():
                return result and result.group(1)
 
        def allowed_threads():
-               req = urllib.request.Request('https://oauth.reddit.com/r/livecounting/search?q=url%3Alive+site%3Areddit.com+self%3Ano&restrict_sr=on&include_over_18=on&sort=new&t=all&limit=100', method='GET')
-               req.add_header('Authorization', 'Bearer {}'.format(access_token))
-               req.add_header('User-Agent', 'autojoin/0.1.0')
-               res = json.load(urllib.request.urlopen(req))
-               flakes = (flake_from_url(thing['data']['url']) for thing in res['data']['children'] if thing['data']['is_self'] is False)
-               return set((f for f in flakes if f))
+               flakes = []
+               params = {
+                       "q": "url:live+site:reddit.com+self:no",
+                       "restrict_sr": "on",
+                       "include_over_18": "on",
+                       "sort": "new",
+                       "t": "all",
+                       "limit": "100",
+               }
+               while True:
+                       req = urllib.request.Request(
+                               urlunparse(
+                                       urlparse("https://oauth.reddit.com/r/livecounting/search")
+                                       ._replace(query = urlencode(params))
+                               ),
+                               method = "GET",
+                               headers = {
+                                       "Authorization": "Bearer {}".format(access_token),
+                                       "User-Agent": "autojoin/0.1.0",
+                               }
+                       )
+                       with urllib.request.urlopen(req) as resp:
+                               data = json.load(resp)
+                       flakes.extend(
+                               flake_from_url(thing["data"]["url"])
+                               for thing in data["data"]["children"]
+                               if not thing["data"]["is_self"]
+                       )
+                       if data["after"] is None:
+                               return set(filter(None, flakes))
+                       else:
+                               params["after"] = data["after"]
 
        cr.execute("BEGIN")
        #cr.execute("DELETE FROM live_autojoin_allowed_event WHERE service_name = %s", (service_name,))