From 3565d02165d86002bc8d768e1f83a83732db0ce5 Mon Sep 17 00:00:00 2001 From: Jakob Cornell Date: Sun, 1 Oct 2023 13:47:07 -0500 Subject: [PATCH] Number parsing refactors and minor changes Among other things, this changes the part of Rust number parsing that reprocesses digit groups non-eagerly; now it slices into the full parts vector rather than copying. --- sharedmodel/src/update_parse.rs | 31 ++++++++++---------- strikebot/strikebot/src/strikebot/updates.py | 4 ++- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/sharedmodel/src/update_parse.rs b/sharedmodel/src/update_parse.rs index 728c01f..974e75f 100644 --- a/sharedmodel/src/update_parse.rs +++ b/sharedmodel/src/update_parse.rs @@ -1,4 +1,5 @@ use std::borrow::Borrow; +use std::cmp::max; use std::collections::{BTreeSet, VecDeque}; use std::slice; @@ -26,11 +27,12 @@ pub enum ParseError { fn parse_command(line: &str, bot_user: &str) -> Option { let lower_line = line.to_lowercase(); if lower_line == format!("/u/{} reset", bot_user) { - return Some(Command::Reset); - } else if lower_line == "sidebar count" || lower_line == "current count" { - return Some(Command::Report); + Some(Command::Reset) } else { - return None; + match lower_line.as_str() { + "sidebar count" | "current count" => Some(Command::Report), + _ => None, + } } } @@ -41,13 +43,13 @@ pub fn parse_update( bot_user: &str ) -> Result { - // TextNode is HtmlElementImpl::Node containing HtmlNode::Text + // TextNode is type(body)::Node containing HtmlNode::Text enum Text { FromTree(TextNode), Other(String), } - // Node is HtmlElementImpl::Node + // Node is type(body)::Node enum WorklistEntry { Space, NewLine, @@ -236,20 +238,20 @@ fn parse_from_lines(lines: &[impl Borrow], curr_count: Option, bot_u deletable = lone; } else { let mut groups_okay = all_parts_valid; - let mut use_parts = &parts; + let mut use_parts = &parts[..]; if let Some(count_val) = curr_count { if sep_opt.is_some() && sep_opt.unwrap().chars().all(char::is_whitespace) { // Presume that the intended count consists of as many valid digit groups as // necessary to match the number of digits in the expected count, if // possible. let digit_count = format!("{}", count_val.abs()).len(); - let mut use_parts = vec![]; - let mut accum = 0; + let mut total_len = 0; + let mut part_count = 0; for (i, part) in parts.iter().cloned().enumerate() { let part_valid = if i == 0 { part.len() <= 3 } else { part.len() == 3 }; - if part_valid && accum < digit_count { - use_parts.push(part); - accum += part.len(); + if part_valid && total_len < digit_count { + total_len += part.len(); + part_count += 1; } else { break; } @@ -257,10 +259,9 @@ fn parse_from_lines(lines: &[impl Borrow], curr_count: Option, bot_u // Could still be a no-separator count with some extra digit groups on the // same line. - if use_parts.is_empty() { - use_parts = vec![parts[0]]; - } + part_count = max(part_count, 1); + use_parts = &parts[..part_count]; lone = lone && use_parts.len() == parts.len(); // Validated by regex as only ASCII digits, leading zeros stripped. diff --git a/strikebot/strikebot/src/strikebot/updates.py b/strikebot/strikebot/src/strikebot/updates.py index 15c8472..afeca2b 100644 --- a/strikebot/strikebot/src/strikebot/updates.py +++ b/strikebot/strikebot/src/strikebot/updates.py @@ -143,7 +143,6 @@ def _parse_from_lines(lines: list[str], curr_count: Optional[int], bot_user: str count_attempt = True deletable = lone else: - groups_okay = True if curr_count is not None and sep and sep.isspace(): # Presume that the intended count consists of as many valid digit groups as # necessary to match the number of digits in the expected count, if possible. @@ -164,6 +163,9 @@ def _parse_from_lines(lines: list[str], curr_count: Optional[int], bot_user: str use_parts = [parts[0]] lone = lone and len(use_parts) == len(parts) + + # Validated by regex as only ASCII digits, leading zeros stripped. + groups_okay = True else: # current count is unknown, or any detected separator unambiguously delineates # the number -- 2.30.2