From: Jakob Cornell Date: Sun, 24 May 2026 04:52:24 +0000 (-0500) Subject: Finish amenity extraction tool X-Git-Url: https://jcornell.net/gitweb/gitweb.cgi?a=commitdiff_plain;h=a523f75ae91e98826e6e96d9a4b2a2da4183582b;p=ntbd-parcels.git Finish amenity extraction tool --- diff --git a/extract_amenities.py b/extract_amenities.py index 3502de1..4a54d97 100644 --- a/extract_amenities.py +++ b/extract_amenities.py @@ -7,18 +7,101 @@ in the output. from collections import defaultdict from decimal import Decimal +from enum import Enum from xml.etree import ElementTree from sys import stdin from typing import Optional +from warnings import warn import json +import re -def classify(node_or_way: ElementTree.Element) -> Optional[str]: - # TODO: finish this - if node_or_way.find("tag[@k='shop'][@v='supermarket']") is not None: - return "grocery" - else: - return None +class AmenityKind(Enum): + GROCERY = "grocery" + COFFEE = "coffee shop" + BREWERY = "brewery" + WINE_BAR = "wine bar" + PIZZA = "pizza" + BAKERY = "bakery" + HARDWARE = "hardware store" + BOOK_STORE = "book store" + BARBER = "barber/salon" + POST_OFFICE = "post office" + LIBRARY = "library" + PHARMACY = "pharmacy" + DENTIST = "dentist" + DOCTOR = "doctor" + MARKET = "farmer's market" + GARDEN = "community garden" + EMERGENCY = "emergency department" + ELEMENTARY_SCHOOL = "elementary school" + HIGH_SCHOOL = "high school" + PLAYGROUND = "playground" + + +def classify(node_or_way: ElementTree.Element) -> Optional[AmenityKind]: + if (shop_tag := node_or_way.find("tag[@k='shop']")) is not None: + match shop_tag.attrib["v"]: + case "supermarket" | "greengrocer": + return AmenityKind.GROCERY + case "coffee": + return AmenityKind.COFFEE + case "bakery" | "pastry": + return AmenityKind.BAKERY + case "hardware" | "doityourself": + return AmenityKind.HARDWARE + case "books": + return AmenityKind.BOOK_STORE + case "hairdresser": + return AmenityKind.BARBER + if (amenity_tag := node_or_way.find("tag[@k='amenity']")) is not None: + match amenity_tag.attrib["v"]: + case "cafe": + return AmenityKind.COFFEE + case "post_office": + return AmenityKind.POST_OFFICE + case "library": + return AmenityKind.LIBRARY + case "pharmacy": + return AmenityKind.PHARMACY + case "dentist": + return AmenityKind.DENTIST + case "doctors" | "clinic": + return AmenityKind.DOCTOR + case "marketplace": + return AmenityKind.MARKET + if (node_or_way.find("tag[@k='craft'][@v='brewery']") is not None + or node_or_way.find("tag[@k='microbrewery'][@v='yes']") is not None): + return AmenityKind.BREWERY + if node_or_way.find("tag[@k='drink:wine'][@v='yes']") is not None: + return AmenityKind.WINE_BAR + if node_or_way.find("tag[@k='cuisine'][@v='pizza']") is not None: + return AmenityKind.PIZZA + if node_or_way.find("tag[@k='hairdresser'][@v='barber']") is not None: + return AmenityKind.BARBER + if node_or_way.find("tag[@k='healthcare'][@v='dentist']") is not None: + return AmenityKind.DENTIST + if node_or_way.find("tag[@k='healthcare'][@v='doctor']") is not None: + return AmenityKind.DOCTOR + if (node_or_way.find("tag[@k='landuse'][@v='allotments']") is not None + or node_or_way.find("tag[@k='garden:type'][@v='community']") is not None): + return AmenityKind.GARDEN + if (node_or_way.find("tag[@v='hospital']") is not None + and node_or_way.find("tag[@k='emergency'][@v='yes']") is not None): + return AmenityKind.EMERGENCY + if (node_or_way.find("tag[@k='amenity'][@v='school']") is not None + or node_or_way.find("tag[@k='building'][@v='school']") is not None): + if (name_tag := node_or_way.find("tag[@k='name']")) is not None: + school_name = name_tag.attrib["v"].lower() + if re.search(r"\belementary\b", school_name): + return AmenityKind.ELEMENTARY_SCHOOL + elif re.search(r"(?