--- /dev/null
+use std::borrow::Borrow;
+use std::collections::{BTreeSet, VecDeque};
+use std::slice;
+
+use anyhow::Context;
+
+use crate::html::{HtmlElement, HtmlNode};
+
+type Count = i64;
+
+#[derive(Debug, PartialEq)]
+pub enum Command { Reset, Report }
+
+pub struct ParsedUpdate {
+ number: Option<Count>,
+ command: Option<Command>,
+ count_attempt: bool,
+ deletable: bool,
+}
+
+pub enum ParseError {
+ InvalidElement(String),
+}
+
+fn parse_command(line: &str, bot_user: &str) -> Option<Command> {
+ let lower_line = line.to_lowercase();
+ if lower_line == format!("/u/{} reset", bot_user) {
+ return Some(Command::Reset);
+ } else if lower_line == "sidebar count" || lower_line == "current count" {
+ return Some(Command::Report);
+ } else {
+ return None;
+ }
+}
+
+/// `curr_count` is the next number up, one more than the last count
+pub fn parse_update(
+ body: impl HtmlElement,
+ curr_count: Option<Count>,
+ bot_user: &str
+) -> Result<ParsedUpdate, ParseError> {
+
+ // TextNode is HtmlElementImpl::Node containing HtmlNode::Text
+ enum Text<TextNode> {
+ FromTree(TextNode),
+ Other(String),
+ }
+
+ // Node is HtmlElementImpl::Node
+ enum WorklistEntry<Node> {
+ Space,
+ NewLine,
+ Node(Node),
+ Text(String),
+ }
+
+ // flatten the update content to plain text
+ let mut worklist: VecDeque<_> = body.contents().map(WorklistEntry::Node).collect();
+ let mut out = vec![vec![]];
+
+ while let Some(entry) = worklist.pop_back() {
+ match entry {
+ WorklistEntry::Space => {
+ out.last_mut().unwrap().push(Text::Other(" ".to_string()));
+ },
+ WorklistEntry::NewLine => {
+ if !out.last().unwrap().is_empty() {
+ out.push(vec![]);
+ }
+ },
+ WorklistEntry::Node(node) => match node {
+ HtmlNode::Element(element) => match element.name() {
+ "br" | "hr" => {
+ if !out.last().unwrap().is_empty() {
+ out.push(vec![]);
+ }
+ },
+ "em" | "strong" | "del" | "span" | "sup" | "code" | "a" | "th" | "td"
+ | "ul" | "ol" | "table" | "thead" | "tbody" => {
+ worklist.extend(element.contents().rev().map(WorklistEntry::Node));
+ },
+ "li" | "p" | "div" | "blockquote" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => {
+ worklist.push_back(WorklistEntry::NewLine);
+ worklist.extend(element.contents().rev().map(WorklistEntry::Node));
+ worklist.push_back(WorklistEntry::NewLine);
+ },
+ "pre" => {
+ out.extend(
+ element.text().as_ref()
+ .split('\n').map(str::to_string)
+ .map(Text::Other).map(|s| vec![s]));
+ out.push(vec![]);
+ },
+ "tr" => {
+ worklist.push_back(WorklistEntry::NewLine);
+
+ // intersperse space markers
+ for el in element.contents().rev() {
+ worklist.push_back(WorklistEntry::Node(el));
+ worklist.push_back(WorklistEntry::Space);
+ }
+ worklist.pop_back();
+
+ worklist.push_back(WorklistEntry::NewLine);
+ },
+ name => return Err(ParseError::InvalidElement(name.to_string())),
+ },
+ HtmlNode::Text(_) => out.last_mut().unwrap().push(Text::FromTree(node)),
+ },
+ WorklistEntry::Text(t) => out.last_mut().unwrap().push(Text::Other(t)),
+ }
+ }
+
+ let pre_strip_lines = out.iter()
+ .filter(|line| !line.is_empty())
+ .map(|line|
+ line.iter().map(|text|
+ match text {
+ Text::FromTree(node) => match node {
+ HtmlNode::Text(t) => t.as_ref(),
+ _ => panic!("element in out vec"),
+ },
+ Text::Other(t) => t,
+ }).collect::<Vec<_>>()
+ .join("")
+ );
+
+ // normalize whitespace according to HTML rendering rules
+ // https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace#explanation
+ let stripped_lines: Vec<_> = pre_strip_lines.map(|line|
+ fancy_regex::Regex::new(" +").unwrap().replace_all(
+ &line.replace('\t', " ").replace('\n', " "),
+ " ")
+ .trim().to_string()
+ ).collect();
+
+ Ok(parse_from_lines(&stripped_lines, curr_count, bot_user))
+}
+
+fn parse_from_lines(lines: &[impl Borrow<str>], curr_count: Option<Count>, bot_user: &str) -> ParsedUpdate {
+ let command = lines.iter().find_map(|l| parse_command(l.borrow(), bot_user));
+
+ let number_out: Option<Count>;
+ let mut count_attempt;
+ let mut deletable;
+
+ if lines.is_empty() {
+ number_out = None;
+ count_attempt = false;
+ deletable = true;
+ } else {
+ // look for groups of ASCII digits (as many as possible) separated by a uniform separator
+ // from the valid set
+ let pattern = fancy_regex::Regex::new(
+ "(?-u)^(?P<v>v)?(?P<neg>-)?(?P<num>\\d+((?P<sep>[,. \u{2009}]|, )\\d+((?P=sep)\\d+)*)?)"
+ ).unwrap();
+ let match_result = pattern
+ .captures(lines[0].borrow())
+ .with_context(|| format!("parsing line {:?}", lines[0].borrow()))
+ .unwrap();
+ if let Some(ref match_) = match_result {
+ let sep_opt = match_.name("sep").map(|m| m.as_str());
+ let post = &lines[0].borrow()[match_.get(0).unwrap().end()..];
+
+ // strip leading zeros
+ let (raw_digits, zeros) = {
+ let mut zeros = false;
+ let mut raw_digits = &match_["num"];
+ while raw_digits.len() > 1 && raw_digits.starts_with('0') {
+ zeros = true;
+ raw_digits = raw_digits.strip_prefix('0').unwrap();
+ if let Some(ref sep) = sep_opt {
+ raw_digits = raw_digits.strip_prefix(sep).unwrap_or(raw_digits);
+ }
+ }
+ (raw_digits, zeros)
+ };
+
+ let parts = match sep_opt {
+ Some(ref sep) => raw_digits.split(sep).collect(),
+ None => vec![raw_digits],
+ };
+ // TODO look into cleaner way to handle parts below (char slices?)
+ let raw_parts: Vec<&[u8]> = parts.iter().cloned().map(str::as_bytes).collect();
+
+ let all_parts_valid = sep_opt.is_none()
+ || ((1..=3).contains(&raw_parts[0].len()) && raw_parts[1..].iter().all(|p| p.len() == 3));
+ let mut lone = lines.len() == 1 && post.chars().all(char::is_whitespace);
+
+ let has_v = match_.name("v").is_some();
+ let typo =
+ // failed paste of leading digits
+ (has_v && raw_parts.len() == 1 && raw_parts[0].len() <= 2)
+ || (has_v && all_parts_valid) // v followed by count
+ || match curr_count {
+ Some(count_val) => {
+ count_val.abs() >= 100 && match_.name("neg").is_some() == (count_val < 0) && {
+ let goal_string = count_val.abs().to_string();
+ let goal_parts: Vec<&[u8]> = goal_string.as_bytes().rchunks(3).rev().collect();
+ let (&last, front) = goal_parts.split_last().unwrap();
+ let partials: Vec<Vec<&[u8]>> = vec![
+ // missing last digit
+ front.iter().cloned().chain([last.get(..last.len() - 1).unwrap()]).collect(),
+
+ // missing last two digits
+ front.iter().cloned().chain([last.get(..last.len() - 2).unwrap()]).collect(),
+
+ // missing second-last digit
+ front.iter().cloned().chain([
+ last.get(..last.len() - 2).unwrap(),
+ slice::from_ref(&last[last.len() - 1]),
+ ]).collect(),
+ ];
+
+ // missing any of last two digits or double paste
+ partials.contains(&raw_parts)
+ || partials.iter().any(
+ |p| raw_parts == p[..p.len() - 1].iter().cloned()
+ .chain([p[p.len() - 1], goal_parts[0]])
+ .chain(goal_parts[1..].iter().cloned())
+ .collect::<Vec<_>>()
+ )
+ }
+ },
+ None => false
+ };
+ if has_v || zeros || typo || (parts == ["0"] && match_.name("neg").is_some()) {
+ number_out = None;
+ count_attempt = true;
+ deletable = lone;
+ } else {
+ let mut groups_okay = all_parts_valid;
+ let mut use_parts = &parts;
+ if let Some(count_val) = curr_count {
+ if sep_opt.is_some() && sep_opt.unwrap().chars().all(char::is_whitespace) {
+ // Presume that the intended count consists of as many valid digit groups as
+ // necessary to match the number of digits in the expected count, if
+ // possible.
+ let digit_count = format!("{}", count_val.abs()).len();
+ let mut use_parts = vec![];
+ let mut accum = 0;
+ for (i, part) in parts.iter().cloned().enumerate() {
+ let part_valid = if i == 0 { part.len() <= 3 } else { part.len() == 3 };
+ if part_valid && accum < digit_count {
+ use_parts.push(part);
+ accum += part.len();
+ } else {
+ break;
+ }
+ }
+
+ // Could still be a no-separator count with some extra digit groups on the
+ // same line.
+ if use_parts.is_empty() {
+ use_parts = vec![parts[0]];
+ }
+
+ lone = lone && use_parts.len() == parts.len();
+ }
+ }
+
+ let digits = use_parts.join("");
+ let number = {
+ let magnitude: Count = digits.parse().unwrap();
+ if match_.name("neg").is_some() { -magnitude } else { magnitude }
+ };
+ let special = match curr_count {
+ Some(count_val) => (number - count_val).abs() <= 25 && is_special(number),
+ None => false,
+ };
+ if groups_okay {
+ deletable = lone && !special;
+ if use_parts.len() == parts.len()
+ && !post.is_empty() && !post.chars().next().unwrap().is_whitespace() {
+ count_attempt = match curr_count {
+ Some(count_val) => (number - count_val).abs() <= 25,
+ None => false,
+ };
+ number_out = None;
+ } else {
+ count_attempt = true;
+ number_out = Some(number);
+ }
+ } else {
+ number_out = None;
+ count_attempt = true;
+ deletable = false;
+ }
+ }
+
+ // TODO consider integrating this into logic above
+ if lines[0].borrow().chars().next().unwrap().is_digit(10) && !count_attempt {
+ count_attempt = true;
+ deletable = false;
+ }
+ } else {
+ // no count attempt found
+ number_out = None;
+ count_attempt = false;
+ deletable = false;
+ }
+ }
+ ParsedUpdate {
+ number: number_out,
+ command: command,
+ count_attempt: count_attempt,
+ deletable: deletable,
+ }
+}
+
+fn is_special(count: Count) -> bool {
+ let count_str = format!("{}", count);
+
+ [0, 1, 333, 999].contains(&(count % 1000))
+ // palindrome
+ || (count > 10_000_000 && count_str.chars().rev().collect::<String>() == count_str)
+ // repeated sequence
+ || (1..=count_str.len() / 2).any(
+ |len| count_str.len() % len == 0
+ && count_str.as_bytes().chunks(len).collect::<BTreeSet<_>>().len() == 1)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_parse_command() {
+ assert_eq!(parse_command("/U/foo reset", "foo"), Some(Command::Reset));
+ assert_eq!(parse_command("current count", ""), Some(Command::Report));
+ assert_eq!(parse_command("foo", ""), None);
+ }
+
+ #[test]
+ fn test_is_special() {
+ assert!(is_special(10_333));
+ assert!(is_special(10_000_001));
+ assert!(is_special(12341234));
+ assert!(!is_special(1937592));
+ }
+}