]> jcornell.net Git - ntbd-parcels.git/commitdiff
Finish amenity extraction tool
authorJakob Cornell <jakob+gpg@jcornell.net>
Sun, 24 May 2026 04:52:24 +0000 (23:52 -0500)
committerJakob Cornell <jakob+gpg@jcornell.net>
Sun, 24 May 2026 04:52:24 +0000 (23:52 -0500)
extract_amenities.py

index 3502de1ff2a3f18b100a337869762305182ed737..4a54d9731609903caf27dc6a8aa9f134f68e9573 100644 (file)
@@ -7,18 +7,101 @@ in the output.
 
 from collections import defaultdict
 from decimal import Decimal
+from enum import Enum
 from xml.etree import ElementTree
 from sys import stdin
 from typing import Optional
+from warnings import warn
 import json
+import re
 
 
-def classify(node_or_way: ElementTree.Element) -> Optional[str]:
-    # TODO: finish this
-    if node_or_way.find("tag[@k='shop'][@v='supermarket']") is not None:
-        return "grocery"
-    else:
-        return None
+class AmenityKind(Enum):
+    GROCERY = "grocery"
+    COFFEE = "coffee shop"
+    BREWERY = "brewery"
+    WINE_BAR = "wine bar"
+    PIZZA = "pizza"
+    BAKERY = "bakery"
+    HARDWARE = "hardware store"
+    BOOK_STORE = "book store"
+    BARBER = "barber/salon"
+    POST_OFFICE = "post office"
+    LIBRARY = "library"
+    PHARMACY = "pharmacy"
+    DENTIST = "dentist"
+    DOCTOR = "doctor"
+    MARKET = "farmer's market"
+    GARDEN = "community garden"
+    EMERGENCY = "emergency department"
+    ELEMENTARY_SCHOOL = "elementary school"
+    HIGH_SCHOOL = "high school"
+    PLAYGROUND = "playground"
+
+
+def classify(node_or_way: ElementTree.Element) -> Optional[AmenityKind]:
+    if (shop_tag := node_or_way.find("tag[@k='shop']")) is not None:
+        match shop_tag.attrib["v"]:
+            case "supermarket" | "greengrocer":
+                return AmenityKind.GROCERY
+            case "coffee":
+                return AmenityKind.COFFEE
+            case "bakery" | "pastry":
+                return AmenityKind.BAKERY
+            case "hardware" | "doityourself":
+                return AmenityKind.HARDWARE
+            case "books":
+                return AmenityKind.BOOK_STORE
+            case "hairdresser":
+                return AmenityKind.BARBER
+    if (amenity_tag := node_or_way.find("tag[@k='amenity']")) is not None:
+        match amenity_tag.attrib["v"]:
+            case "cafe":
+                return AmenityKind.COFFEE
+            case "post_office":
+                return AmenityKind.POST_OFFICE
+            case "library":
+                return AmenityKind.LIBRARY
+            case "pharmacy":
+                return AmenityKind.PHARMACY
+            case "dentist":
+                return AmenityKind.DENTIST
+            case "doctors" | "clinic":
+                return AmenityKind.DOCTOR
+            case "marketplace":
+                return AmenityKind.MARKET
+    if (node_or_way.find("tag[@k='craft'][@v='brewery']") is not None
+        or node_or_way.find("tag[@k='microbrewery'][@v='yes']") is not None):
+        return AmenityKind.BREWERY
+    if node_or_way.find("tag[@k='drink:wine'][@v='yes']") is not None:
+        return AmenityKind.WINE_BAR
+    if node_or_way.find("tag[@k='cuisine'][@v='pizza']") is not None:
+        return AmenityKind.PIZZA
+    if node_or_way.find("tag[@k='hairdresser'][@v='barber']") is not None:
+        return AmenityKind.BARBER
+    if node_or_way.find("tag[@k='healthcare'][@v='dentist']") is not None:
+        return AmenityKind.DENTIST
+    if node_or_way.find("tag[@k='healthcare'][@v='doctor']") is not None:
+        return AmenityKind.DOCTOR
+    if (node_or_way.find("tag[@k='landuse'][@v='allotments']") is not None
+        or node_or_way.find("tag[@k='garden:type'][@v='community']") is not None):
+        return AmenityKind.GARDEN
+    if (node_or_way.find("tag[@v='hospital']") is not None
+        and node_or_way.find("tag[@k='emergency'][@v='yes']") is not None):
+        return AmenityKind.EMERGENCY
+    if (node_or_way.find("tag[@k='amenity'][@v='school']") is not None
+        or node_or_way.find("tag[@k='building'][@v='school']") is not None):
+        if (name_tag := node_or_way.find("tag[@k='name']")) is not None:
+            school_name = name_tag.attrib["v"].lower()
+            if re.search(r"\belementary\b", school_name):
+                return AmenityKind.ELEMENTARY_SCHOOL
+            elif re.search(r"(?<!junior )\bhigh\b", school_name):
+                return AmenityKind.HIGH_SCHOOL
+    if node_or_way.find("tag[@k='leisure'][@v='playground']") is not None:
+        access_tag = node_or_way.find("tag[@k='access']")
+        if access_tag is None or access_tag.attrib["v"] not in ["private", "customers", "no"]:
+            return AmenityKind.PLAYGROUND
+    return None
 
 
 node_locations = {}
@@ -44,4 +127,8 @@ for (event_kind, el) in ElementTree.iterparse(stdin, events=["end"]):
                 lon = (min(lons) + max(lons)) / 2
                 locations_by_amenity[classification].append((float(lat), float(lon)))
 
-print(json.dumps(locations_by_amenity))
+if missing := set(AmenityKind) - locations_by_amenity.keys():
+    display = sorted(kind.name for kind in missing)
+    warn(f"Amenities not found: {display}")
+
+print(json.dumps({kind.name: locs for (kind, locs) in locations_by_amenity.items()}))