From: Andrea Cornell Date: Sat, 23 Sep 2023 03:09:45 +0000 (-0400) Subject: code jumble X-Git-Url: https://jcornell.net/gitweb/gitweb.cgi?a=commitdiff_plain;h=44267a0b1cdb299cf4bb695ad5639f95449be10a;p=counting.git code jumble this is so that Jakob can use UniqHtmlElement for testing the number parsing --- diff --git a/sharedmodel/src/html.rs b/sharedmodel/src/html.rs new file mode 100644 index 0000000..a585349 --- /dev/null +++ b/sharedmodel/src/html.rs @@ -0,0 +1,16 @@ +use std::borrow::Borrow; + +#[derive(Debug)] +pub enum HtmlNode { + Text(T), + Element(U) +} + +pub trait HtmlElement: Sized { + type Text1: AsRef; + type Text2: AsRef; + type ContentsIter: DoubleEndedIterator>; + fn name(&self) -> &str; + fn contents(self) -> Self::ContentsIter; + fn text(&self) -> Self::Text2; +} diff --git a/sharedmodel/src/html_html5ever.rs b/sharedmodel/src/html_html5ever.rs new file mode 100644 index 0000000..9afc890 --- /dev/null +++ b/sharedmodel/src/html_html5ever.rs @@ -0,0 +1,200 @@ +use std::cell::{Cell, RefCell}; +use std::rc::{Rc, Weak}; +use std::borrow::Cow; + +use html5ever::tree_builder::{TreeSink, ElementFlags}; +use html5ever::{QualName, ExpandedName, LocalNameStaticSet, Attribute}; +use html5ever::tendril::{StrTendril, TendrilSink}; +use html5ever::interface::{NodeOrText, QuirksMode}; +use html5ever::driver::ParseOpts; +use string_cache::Atom; + +use crate::html::{HtmlNode, HtmlElement}; + +type RcNode<'a> = HtmlNode>; + +pub fn parse_html(html: &str) -> UniqHtmlElement { + let mut parser = html5ever::parse_fragment(RcHtmlElement(Rc::new(HtmlElementInner::root())), ParseOpts::default(), QualName::new(None, Atom::from("http://www.w3.org/1999/xhtml"), Atom::from("body")), Vec::new()); + parser.process(StrTendril::from(html)); + parser.finish() +} + +// #[derive(Debug)] +struct HtmlElementInner<'a> { + name: QualName, + parent: Cell>>, + contents: RefCell>> +} + +impl HtmlElementInner<'_> { + fn root() -> Self { + let name = QualName::new(None, Atom::from(""), Atom::from("")); + let parent = Cell::new(Weak::new()); + let contents = RefCell::new(Vec::new()); + HtmlElementInner { name, parent, contents } + } +} + +//#[derive(Clone, Debug)] +#[derive(Clone)] +struct RcHtmlElement<'a>(Rc>); + +fn nodeortext_to_htmlnode<'a>(v: NodeOrText>) -> RcNode<'a> { + match v { + NodeOrText::AppendNode(h) => HtmlNode::Element(h), + NodeOrText::AppendText(s) => HtmlNode::Text(s) + } +} + +fn node_eq_elem<'a>(node: &RcNode<'a>, elem: &RcHtmlElement<'a>) -> bool { + match node { + HtmlNode::Element(elem1) if Rc::ptr_eq(&elem.0, &elem1.0) => true, + _ => false + } +} + +impl<'a> TreeSink for RcHtmlElement<'a> { + type Handle = Self; + type Output = UniqHtmlElement; + fn finish(self) -> UniqHtmlElement { + // let HtmlElementInner { name, parent, contents } = Rc::try_unwrap(self.0).map_err(|_| ()).expect("no refs left around"); + let HtmlElementInner { ref name, ref contents, .. } = *self.0; + let name = name.local.clone(); + let contents = contents.borrow_mut().drain(..).map(|node| { + match node { + HtmlNode::Text(s) => HtmlNode::Text(s), + HtmlNode::Element(e) => HtmlNode::Element(e.finish()) + } + }).collect(); + UniqHtmlElement { name, contents } + } + + fn parse_error(&mut self, msg: Cow<'static, str>) { + println!("{}", msg); + panic!("how do parse??"); + } + + fn get_document(&mut self) -> Self::Handle { + self.clone() + } + + fn elem_name<'b>(&'b self, target: &'b Self::Handle) -> ExpandedName<'b> { + target.0.name.expanded() + } + + fn create_element(&mut self, name: QualName, attrs: Vec, flags: ElementFlags) -> Self::Handle { + assert_eq!(flags.template, false); + assert_eq!(flags.mathml_annotation_xml_integration_point, false); + let contents = RefCell::new(Vec::new()); + let parent = Cell::new(Weak::new()); + // handily, we never use attribute data. Toss it + RcHtmlElement(Rc::new(HtmlElementInner { name, parent, contents })) + } + + fn create_comment(&mut self, text: StrTendril) -> Self::Handle { + panic!("HTML parser encountered a comment"); + } + + fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle { + panic!("HTML parser encountered a processing instruction"); + } + + fn append(&mut self, parent: &Self::Handle, child: NodeOrText) { + let node = nodeortext_to_htmlnode(child); + if let HtmlNode::Element(ref elem) = node { + elem.0.parent.set(Rc::downgrade(&parent.0)); + } + parent.0.contents.borrow_mut().push(node); + } + + fn append_based_on_parent_node(&mut self, element: &Self::Handle, prev_element: &Self::Handle, child: NodeOrText) { + panic!("Not clear what this is for or if it needs to be implemented"); + } + + fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril) { + panic!("This can likely be a no-op, just not sure if it gets called for these fragments"); + } + + fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle { + panic!("HTML parser encountered a