Number parsing refactors and minor changes
authorJakob Cornell <jakob+gpg@jcornell.net>
Sun, 1 Oct 2023 18:47:07 +0000 (13:47 -0500)
committerJakob Cornell <jakob+gpg@jcornell.net>
Sun, 1 Oct 2023 18:47:07 +0000 (13:47 -0500)
Among other things, this changes the part of Rust number parsing that reprocesses digit groups
non-eagerly; now it slices into the full parts vector rather than copying.

sharedmodel/src/update_parse.rs
strikebot/strikebot/src/strikebot/updates.py

index 728c01fb0b29b6b709f68cee3a75702130a6930b..974e75f49b4351ca98a9c5046f7339ee16d94549 100644 (file)
@@ -1,4 +1,5 @@
 use std::borrow::Borrow;
+use std::cmp::max;
 use std::collections::{BTreeSet, VecDeque};
 use std::slice;
 
@@ -26,11 +27,12 @@ pub enum ParseError {
 fn parse_command(line: &str, bot_user: &str) -> Option<Command> {
        let lower_line = line.to_lowercase();
        if lower_line == format!("/u/{} reset", bot_user) {
-               return Some(Command::Reset);
-       } else if lower_line == "sidebar count" || lower_line == "current count" {
-               return Some(Command::Report);
+               Some(Command::Reset)
        } else {
-               return None;
+               match lower_line.as_str() {
+                       "sidebar count" | "current count" => Some(Command::Report),
+                       _ => None,
+               }
        }
 }
 
@@ -41,13 +43,13 @@ pub fn parse_update(
        bot_user: &str
 ) -> Result<ParsedUpdate, ParseError> {
 
-       // TextNode is HtmlElementImpl::Node containing HtmlNode::Text
+       // TextNode is type(body)::Node containing HtmlNode::Text
        enum Text<TextNode> {
                FromTree(TextNode),
                Other(String),
        }
 
-       // Node is HtmlElementImpl::Node
+       // Node is type(body)::Node
        enum WorklistEntry<Node> {
                Space,
                NewLine,
@@ -236,20 +238,20 @@ fn parse_from_lines(lines: &[impl Borrow<str>], curr_count: Option<Count>, bot_u
                                deletable = lone;
                        } else {
                                let mut groups_okay = all_parts_valid;
-                               let mut use_parts = &parts;
+                               let mut use_parts = &parts[..];
                                if let Some(count_val) = curr_count {
                                        if sep_opt.is_some() && sep_opt.unwrap().chars().all(char::is_whitespace) {
                                                // Presume that the intended count consists of as many valid digit groups as
                                                // necessary to match the number of digits in the expected count, if
                                                // possible.
                                                let digit_count = format!("{}", count_val.abs()).len();
-                                               let mut use_parts = vec![];
-                                               let mut accum = 0;
+                                               let mut total_len = 0;
+                                               let mut part_count = 0;
                                                for (i, part) in parts.iter().cloned().enumerate() {
                                                        let part_valid = if i == 0 { part.len() <= 3 } else { part.len() == 3 };
-                                                       if part_valid && accum < digit_count {
-                                                               use_parts.push(part);
-                                                               accum += part.len();
+                                                       if part_valid && total_len < digit_count {
+                                                               total_len += part.len();
+                                                               part_count += 1;
                                                        } else {
                                                                break;
                                                        }
@@ -257,10 +259,9 @@ fn parse_from_lines(lines: &[impl Borrow<str>], curr_count: Option<Count>, bot_u
 
                                                // Could still be a no-separator count with some extra digit groups on the
                                                // same line.
-                                               if use_parts.is_empty() {
-                                                       use_parts = vec![parts[0]];
-                                               }
+                                               part_count = max(part_count, 1);
 
+                                               use_parts = &parts[..part_count];
                                                lone = lone && use_parts.len() == parts.len();
 
                                                // Validated by regex as only ASCII digits, leading zeros stripped.
index 15c84727f5d9b6779abb6c5dfec344528fba7a78..afeca2b01a95ba5a6db5b243c128da5137f4e376 100644 (file)
@@ -143,7 +143,6 @@ def _parse_from_lines(lines: list[str], curr_count: Optional[int], bot_user: str
                                count_attempt = True
                                deletable = lone
                        else:
-                               groups_okay = True
                                if curr_count is not None and sep and sep.isspace():
                                        # Presume that the intended count consists of as many valid digit groups as
                                        # necessary to match the number of digits in the expected count, if possible.
@@ -164,6 +163,9 @@ def _parse_from_lines(lines: list[str], curr_count: Optional[int], bot_user: str
                                                use_parts = [parts[0]]
 
                                        lone = lone and len(use_parts) == len(parts)
+
+                                       # Validated by regex as only ASCII digits, leading zeros stripped.
+                                       groups_okay = True
                                else:
                                        # current count is unknown, or any detected separator unambiguously delineates
                                        # the number