From 44267a0b1cdb299cf4bb695ad5639f95449be10a Mon Sep 17 00:00:00 2001 From: Andrea Cornell Date: Fri, 22 Sep 2023 23:09:45 -0400 Subject: [PATCH] code jumble this is so that Jakob can use UniqHtmlElement for testing the number parsing --- sharedmodel/src/{html_nodes.rs => html.rs} | 0 sharedmodel/src/html_html5ever.rs | 200 +++++++++++++++++++++ sharedmodel/src/lib.rs | 6 +- sharedmodel/src/live_event_buffer.rs | 2 +- sharedmodel/src/main.rs | 198 +------------------- 5 files changed, 207 insertions(+), 199 deletions(-) rename sharedmodel/src/{html_nodes.rs => html.rs} (100%) create mode 100644 sharedmodel/src/html_html5ever.rs diff --git a/sharedmodel/src/html_nodes.rs b/sharedmodel/src/html.rs similarity index 100% rename from sharedmodel/src/html_nodes.rs rename to sharedmodel/src/html.rs diff --git a/sharedmodel/src/html_html5ever.rs b/sharedmodel/src/html_html5ever.rs new file mode 100644 index 0000000..9afc890 --- /dev/null +++ b/sharedmodel/src/html_html5ever.rs @@ -0,0 +1,200 @@ +use std::cell::{Cell, RefCell}; +use std::rc::{Rc, Weak}; +use std::borrow::Cow; + +use html5ever::tree_builder::{TreeSink, ElementFlags}; +use html5ever::{QualName, ExpandedName, LocalNameStaticSet, Attribute}; +use html5ever::tendril::{StrTendril, TendrilSink}; +use html5ever::interface::{NodeOrText, QuirksMode}; +use html5ever::driver::ParseOpts; +use string_cache::Atom; + +use crate::html::{HtmlNode, HtmlElement}; + +type RcNode<'a> = HtmlNode>; + +pub fn parse_html(html: &str) -> UniqHtmlElement { + let mut parser = html5ever::parse_fragment(RcHtmlElement(Rc::new(HtmlElementInner::root())), ParseOpts::default(), QualName::new(None, Atom::from("http://www.w3.org/1999/xhtml"), Atom::from("body")), Vec::new()); + parser.process(StrTendril::from(html)); + parser.finish() +} + +// #[derive(Debug)] +struct HtmlElementInner<'a> { + name: QualName, + parent: Cell>>, + contents: RefCell>> +} + +impl HtmlElementInner<'_> { + fn root() -> Self { + let name = QualName::new(None, Atom::from(""), Atom::from("")); + let parent = Cell::new(Weak::new()); + let contents = RefCell::new(Vec::new()); + HtmlElementInner { name, parent, contents } + } +} + +//#[derive(Clone, Debug)] +#[derive(Clone)] +struct RcHtmlElement<'a>(Rc>); + +fn nodeortext_to_htmlnode<'a>(v: NodeOrText>) -> RcNode<'a> { + match v { + NodeOrText::AppendNode(h) => HtmlNode::Element(h), + NodeOrText::AppendText(s) => HtmlNode::Text(s) + } +} + +fn node_eq_elem<'a>(node: &RcNode<'a>, elem: &RcHtmlElement<'a>) -> bool { + match node { + HtmlNode::Element(elem1) if Rc::ptr_eq(&elem.0, &elem1.0) => true, + _ => false + } +} + +impl<'a> TreeSink for RcHtmlElement<'a> { + type Handle = Self; + type Output = UniqHtmlElement; + fn finish(self) -> UniqHtmlElement { + // let HtmlElementInner { name, parent, contents } = Rc::try_unwrap(self.0).map_err(|_| ()).expect("no refs left around"); + let HtmlElementInner { ref name, ref contents, .. } = *self.0; + let name = name.local.clone(); + let contents = contents.borrow_mut().drain(..).map(|node| { + match node { + HtmlNode::Text(s) => HtmlNode::Text(s), + HtmlNode::Element(e) => HtmlNode::Element(e.finish()) + } + }).collect(); + UniqHtmlElement { name, contents } + } + + fn parse_error(&mut self, msg: Cow<'static, str>) { + println!("{}", msg); + panic!("how do parse??"); + } + + fn get_document(&mut self) -> Self::Handle { + self.clone() + } + + fn elem_name<'b>(&'b self, target: &'b Self::Handle) -> ExpandedName<'b> { + target.0.name.expanded() + } + + fn create_element(&mut self, name: QualName, attrs: Vec, flags: ElementFlags) -> Self::Handle { + assert_eq!(flags.template, false); + assert_eq!(flags.mathml_annotation_xml_integration_point, false); + let contents = RefCell::new(Vec::new()); + let parent = Cell::new(Weak::new()); + // handily, we never use attribute data. Toss it + RcHtmlElement(Rc::new(HtmlElementInner { name, parent, contents })) + } + + fn create_comment(&mut self, text: StrTendril) -> Self::Handle { + panic!("HTML parser encountered a comment"); + } + + fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle { + panic!("HTML parser encountered a processing instruction"); + } + + fn append(&mut self, parent: &Self::Handle, child: NodeOrText) { + let node = nodeortext_to_htmlnode(child); + if let HtmlNode::Element(ref elem) = node { + elem.0.parent.set(Rc::downgrade(&parent.0)); + } + parent.0.contents.borrow_mut().push(node); + } + + fn append_based_on_parent_node(&mut self, element: &Self::Handle, prev_element: &Self::Handle, child: NodeOrText) { + panic!("Not clear what this is for or if it needs to be implemented"); + } + + fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril) { + panic!("This can likely be a no-op, just not sure if it gets called for these fragments"); + } + + fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle { + panic!("HTML parser encountered a