From 3565d02165d86002bc8d768e1f83a83732db0ce5 Mon Sep 17 00:00:00 2001
From: Jakob Cornell <jakob+gpg@jcornell.net>
Date: Sun, 1 Oct 2023 13:47:07 -0500
Subject: [PATCH] Number parsing refactors and minor changes

Among other things, this changes the part of Rust number parsing that reprocesses digit groups
non-eagerly; now it slices into the full parts vector rather than copying.
---
 sharedmodel/src/update_parse.rs              | 31 ++++++++++----------
 strikebot/strikebot/src/strikebot/updates.py |  4 ++-
 2 files changed, 19 insertions(+), 16 deletions(-)
diff --git a/sharedmodel/src/update_parse.rs b/sharedmodel/src/update_parse.rs
index 728c01f..974e75f 100644
--- a/sharedmodel/src/update_parse.rs
+++ b/sharedmodel/src/update_parse.rs
@@ -1,4 +1,5 @@
 use std::borrow::Borrow;
+use std::cmp::max;
 use std::collections::{BTreeSet, VecDeque};
 use std::slice;
 
@@ -26,11 +27,12 @@ pub enum ParseError {
 fn parse_command(line: &str, bot_user: &str) -> Option<Command> {
 	let lower_line = line.to_lowercase();
 	if lower_line == format!("/u/{} reset", bot_user) {
-		return Some(Command::Reset);
-	} else if lower_line == "sidebar count" || lower_line == "current count" {
-		return Some(Command::Report);
+		Some(Command::Reset)
 	} else {
-		return None;
+		match lower_line.as_str() {
+			"sidebar count" | "current count" => Some(Command::Report),
+			_ => None,
+		}
 	}
 }
 
@@ -41,13 +43,13 @@ pub fn parse_update(
 	bot_user: &str
 ) -> Result<ParsedUpdate, ParseError> {
 
-	// TextNode is HtmlElementImpl::Node containing HtmlNode::Text
+	// TextNode is type(body)::Node containing HtmlNode::Text
 	enum Text<TextNode> {
 		FromTree(TextNode),
 		Other(String),
 	}
 
-	// Node is HtmlElementImpl::Node
+	// Node is type(body)::Node
 	enum WorklistEntry<Node> {
 		Space,
 		NewLine,
@@ -236,20 +238,20 @@ fn parse_from_lines(lines: &[impl Borrow<str>], curr_count: Option<Count>, bot_u
 				deletable = lone;
 			} else {
 				let mut groups_okay = all_parts_valid;
-				let mut use_parts = &parts;
+				let mut use_parts = &parts[..];
 				if let Some(count_val) = curr_count {
 					if sep_opt.is_some() && sep_opt.unwrap().chars().all(char::is_whitespace) {
 						// Presume that the intended count consists of as many valid digit groups as
 						// necessary to match the number of digits in the expected count, if
 						// possible.
 						let digit_count = format!("{}", count_val.abs()).len();
-						let mut use_parts = vec![];
-						let mut accum = 0;
+						let mut total_len = 0;
+						let mut part_count = 0;
 						for (i, part) in parts.iter().cloned().enumerate() {
 							let part_valid = if i == 0 { part.len() <= 3 } else { part.len() == 3 };
-							if part_valid && accum < digit_count {
-								use_parts.push(part);
-								accum += part.len();
+							if part_valid && total_len < digit_count {
+								total_len += part.len();
+								part_count += 1;
 							} else {
 								break;
 							}
@@ -257,10 +259,9 @@ fn parse_from_lines(lines: &[impl Borrow<str>], curr_count: Option<Count>, bot_u
 
 						// Could still be a no-separator count with some extra digit groups on the
 						// same line.
-						if use_parts.is_empty() {
-							use_parts = vec![parts[0]];
-						}
+						part_count = max(part_count, 1);
 
+						use_parts = &parts[..part_count];
 						lone = lone && use_parts.len() == parts.len();
 
 						// Validated by regex as only ASCII digits, leading zeros stripped.
diff --git a/strikebot/strikebot/src/strikebot/updates.py b/strikebot/strikebot/src/strikebot/updates.py
index 15c8472..afeca2b 100644
--- a/strikebot/strikebot/src/strikebot/updates.py
+++ b/strikebot/strikebot/src/strikebot/updates.py
@@ -143,7 +143,6 @@ def _parse_from_lines(lines: list[str], curr_count: Optional[int], bot_user: str
 				count_attempt = True
 				deletable = lone
 			else:
-				groups_okay = True
 				if curr_count is not None and sep and sep.isspace():
 					# Presume that the intended count consists of as many valid digit groups as
 					# necessary to match the number of digits in the expected count, if possible.
@@ -164,6 +163,9 @@ def _parse_from_lines(lines: list[str], curr_count: Optional[int], bot_user: str
 						use_parts = [parts[0]]
 
 					lone = lone and len(use_parts) == len(parts)
+
+					# Validated by regex as only ASCII digits, leading zeros stripped.
+					groups_okay = True
 				else:
 					# current count is unknown, or any detected separator unambiguously delineates
 					# the number
-- 
2.30.2