From 3e6bad885e057e26f95b64987a203e37818ef912 Mon Sep 17 00:00:00 2001 From: Andrea Cornell Date: Thu, 28 Sep 2023 14:05:50 -0400 Subject: [PATCH] UniqHtmlElement: don't wrap the root element * When parsing a fragment, html5ever will (apparently) generate an root element if the fragment doesn't already look like that. So, it's unhelpful to wrap the html5ever output with our own root faux-element. UniqHtmlElement is changed to not do this. * Dependency specs relaxed in Cargo.toml to allow building with current Debian stable packaged crates. * I committed Cargo.lock at some point. Remove that. --- Cargo.lock | 443 ------------------------------ sharedmodel/Cargo.toml | 5 +- sharedmodel/src/html_html5ever.rs | 29 +- 3 files changed, 22 insertions(+), 455 deletions(-) delete mode 100644 Cargo.lock diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 2bb85bc..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,443 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -[[package]] -name = "bumpalo" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820" - -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "Could not get crate checksum" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "Could not get crate checksum" - -[[package]] -name = "futf" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" -dependencies = [ - "mac", - "new_debug_unreachable", -] - -[[package]] -name = "getrandom" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "Could not get crate checksum" -dependencies = [ - "cfg-if 0.1.10", - "libc", -] - -[[package]] -name = "html5ever" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" -dependencies = [ - "log", - "mac", - "markup5ever", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "itoa" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" - -[[package]] -name = "js-sys" -version = "0.3.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.138" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" - -[[package]] -name = "log" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "Could not get crate checksum" -dependencies = [ - "cfg-if 0.1.10", -] - -[[package]] -name = "mac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" - -[[package]] -name = "markup5ever" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab" -dependencies = [ - "log", - "phf", - "phf_codegen", - "serde", - "serde_derive", - "serde_json", - "string_cache", - "string_cache_codegen", - "tendril", -] - -[[package]] -name = "new_debug_unreachable" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cdc457076c78ab54d5e0d6fa7c47981757f1e34dc39ff92787f217dede586c4" -dependencies = [ - "unreachable", -] - -[[package]] -name = "phf" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "Could not get crate checksum" -dependencies = [ - "phf_shared", - "rand", -] - -[[package]] -name = "phf_shared" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" -dependencies = [ - "siphasher", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "Could not get crate checksum" - -[[package]] -name = "precomputed-hash" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" - -[[package]] -name = "proc-macro2" -version = "1.0.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "Could not get crate checksum" -dependencies = [ - "getrandom", - "libc", - "rand_chacha", - "rand_core", - "rand_hc", - "rand_pcg", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core", -] - -[[package]] -name = "rand_pcg" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" -dependencies = [ - "rand_core", -] - -[[package]] -name = "ryu" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" - -[[package]] -name = "serde" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df6ac6412072f67cf767ebbde4133a5b2e88e76dc6187fa7104cd16f783399" - -[[package]] -name = "serde_derive" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e549e3abf4fb8621bd1609f11dfc9f5e50320802273b12f3811a67e6716ea6c" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f72eb2a68a7dc3f9a691bfda9305a1c017a6215e5a4545c258500d2099a37c2" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "sharedmodel" -version = "0.1.0" -dependencies = [ - "html5ever", - "string_cache", - "uuid", -] - -[[package]] -name = "sharedmodel_wasm" -version = "0.1.0" -dependencies = [ - "js-sys", - "sharedmodel", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "siphasher" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83da420ee8d1a89e640d0948c646c1c088758d3a3c538f943bfa97bdac17929d" - -[[package]] -name = "string_cache" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2940c75beb4e3bf3a494cef919a747a2cb81e52571e212bfbd185074add7208a" -dependencies = [ - "lazy_static", - "new_debug_unreachable", - "phf_shared", - "precomputed-hash", - "serde", -] - -[[package]] -name = "string_cache_codegen" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" -dependencies = [ - "phf_generator", - "phf_shared", - "proc-macro2", - "quote", -] - -[[package]] -name = "syn" -version = "1.0.103" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "Could not get crate checksum" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "tendril" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de21546595a0873061940d994bbbc5c35f024ae4fd61ec5c5b159115684f508" -dependencies = [ - "futf", - "mac", - "utf-8", -] - -[[package]] -name = "unicode-ident" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" - -[[package]] -name = "unreachable" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" -dependencies = [ - "void", -] - -[[package]] -name = "utf-8" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" - -[[package]] -name = "uuid" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "Could not get crate checksum" - -[[package]] -name = "void" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" - -[[package]] -name = "wasm-bindgen" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" -dependencies = [ - "cfg-if 1.0.0", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" - -[[package]] -name = "web-sys" -version = "0.3.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] diff --git a/sharedmodel/Cargo.toml b/sharedmodel/Cargo.toml index 7303059..fb0b7fd 100644 --- a/sharedmodel/Cargo.toml +++ b/sharedmodel/Cargo.toml @@ -18,7 +18,8 @@ edition = "2018" uuid = "0.8" html5ever = "0.25" string_cache = "0.8" -anyhow = "1.0.71" +anyhow = "^1.0.31" # no need for Unicode support -fancy-regex = { version = "0.11.0", default-features = false, features = ["perf"] } +#fancy-regex = { version = "^0.7.0", default-features = false, features = ["perf"] } +fancy-regex = { version = "^0.7.0", default-features = false, features = [] } diff --git a/sharedmodel/src/html_html5ever.rs b/sharedmodel/src/html_html5ever.rs index 9afc890..f457339 100644 --- a/sharedmodel/src/html_html5ever.rs +++ b/sharedmodel/src/html_html5ever.rs @@ -33,6 +33,21 @@ impl HtmlElementInner<'_> { let contents = RefCell::new(Vec::new()); HtmlElementInner { name, parent, contents } } + + /* unpleasantly, this does empty self's child list even though it only takes + * an immutable reference to self. It seems that only an immutable reference + * is available where to_unique is called by RcHtmlElement::finish. I'm not + * sure how to fix this */ + fn to_unique(self) -> UniqHtmlElement { + let name = self.name.local.clone(); + let contents = self.contents.borrow_mut().drain(..).map(|node| { + match node { + HtmlNode::Text(s) => HtmlNode::Text(s), + HtmlNode::Element(e) => HtmlNode::Element(Rc::try_unwrap(e.0).map_err(|_|()).expect("only one strong reference to element").to_unique()) + } + }).collect(); + UniqHtmlElement { name, contents } + } } //#[derive(Clone, Debug)] @@ -57,16 +72,10 @@ impl<'a> TreeSink for RcHtmlElement<'a> { type Handle = Self; type Output = UniqHtmlElement; fn finish(self) -> UniqHtmlElement { - // let HtmlElementInner { name, parent, contents } = Rc::try_unwrap(self.0).map_err(|_| ()).expect("no refs left around"); - let HtmlElementInner { ref name, ref contents, .. } = *self.0; - let name = name.local.clone(); - let contents = contents.borrow_mut().drain(..).map(|node| { - match node { - HtmlNode::Text(s) => HtmlNode::Text(s), - HtmlNode::Element(e) => HtmlNode::Element(e.finish()) - } - }).collect(); - UniqHtmlElement { name, contents } + match (*self.0).contents.borrow_mut().pop().expect("root should have a child node") { + HtmlNode::Element(e) => Rc::try_unwrap(e.0).map_err(|_|()).expect("only one strong reference to root element").to_unique(), + _ => panic!("child node should be an element") + } } fn parse_error(&mut self, msg: Cow<'static, str>) { -- 2.30.2