Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt of getting <pre> to not parse inner contents similar to <script> #582

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
use flake
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ target
.vscode
Cargo.lock
*.racertmp
.direnv
91 changes: 91 additions & 0 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# please read flake introduction here:
# https://fasterthanli.me/series/building-a-rust-service-with-nix/part-10#a-flake-with-a-dev-shell
{
description = "The fairsync importer prototype flake";
inputs = {
rust-overlay.url = "github:oxalica/rust-overlay";
};
outputs =
{ self, nixpkgs, flake-utils, rust-overlay }:
flake-utils.lib.eachDefaultSystem
(system:
let
overlays = [ (import rust-overlay) ];
pkgs = import nixpkgs {
inherit system overlays;
};
rust = pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
platform_packages =
if pkgs.stdenv.isLinux then
with pkgs; [ ]
else if pkgs.stdenv.isDarwin then
with pkgs.darwin.apple_sdk.frameworks; [
CoreFoundation
Security
SystemConfiguration
]
else
throw "unsupported platform";
in
with pkgs;
rec {
trunk = pkgs.callPackage ./trunk.nix {
inherit (darwin.apple_sdk.frameworks) CoreServices Security SystemConfiguration;
};
#leptosfmt = pkgs.callPackage ./leptosfmt.nix {};

devShells.default = mkShell {
buildInputs = [
rust
wasm-pack
firefox
trunk # required to bundle the frontend
binaryen # required to minify WASM files with wasm-opt
git
pkg-config
just # task runner
#nodejs # required to install tailwind plugins
];
};
}
);
}
1 change: 1 addition & 0 deletions html5ever/src/tokenizer/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ pub enum Token {
pub enum TokenSinkResult<Handle> {
Continue,
Script(Handle),
PreData(Handle),
Plaintext,
RawData(states::RawKind),
}
Expand Down
65 changes: 54 additions & 11 deletions html5ever/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,15 @@ pub use self::interface::{CommentToken, DoctypeToken, TagToken, Token};
pub use self::interface::{Doctype, EndTag, StartTag, Tag, TagKind};
pub use self::interface::{TokenSink, TokenSinkResult};

use self::char_ref::{CharRef, CharRefTokenizer};
use self::states::{DoctypeIdKind, Public, System};
use self::states::{DoubleEscaped, Escaped};
use self::states::{DoubleQuoted, SingleQuoted, Unquoted};
use self::states::{Rawtext, Rcdata, ScriptData, ScriptDataEscaped};

use self::char_ref::{CharRef, CharRefTokenizer};
use self::states::{PreData, Rawtext, Rcdata, ScriptData, ScriptDataEscaped};

use crate::util::str::lower_ascii_letter;

use log::{debug, trace};
use log::debug;
use mac::format_if;
use markup5ever::{namespace_url, ns, small_char_set};
use std::borrow::Cow::{self, Borrowed};
Expand All @@ -43,13 +42,15 @@ pub enum ProcessResult<Handle> {
Continue,
Suspend,
Script(Handle),
PreData(Handle),
}

#[must_use]
#[derive(Debug)]
pub enum TokenizerResult<Handle> {
Done,
Script(Handle),
PreData(Handle),
}

fn option_push(opt_str: &mut Option<StrTendril>, c: char) {
Expand Down Expand Up @@ -277,7 +278,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
self.emit_error(Cow::Owned(msg));
}

trace!("got character {}", c);
println!("got character {}", c);
self.current_char.set(c);
Some(c)
}
Expand Down Expand Up @@ -305,7 +306,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}

let d = input.pop_except_from(set);
trace!("got characters {:?}", d);
println!("got characters {:?}", d);
match d {
Some(FromSet(c)) => self.get_preprocessed_char(c, input).map(FromSet),

Expand Down Expand Up @@ -363,15 +364,27 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
match run {
ProcessResult::Continue => (),
ProcessResult::Suspend => break,
ProcessResult::Script(node) => return TokenizerResult::Script(node),
ProcessResult::Script(node) => return { TokenizerResult::Script(node) },
ProcessResult::PreData(node) => return { TokenizerResult::PreData(node) },
}
}
} else {
loop {
match self.step(input) {
ProcessResult::Continue => (),
ProcessResult::Suspend => break,
ProcessResult::Script(node) => return TokenizerResult::Script(node),
ProcessResult::Script(node) => {
return {
println!(" TokenizerResult::Script(node)");
TokenizerResult::Script(node)
}
},
ProcessResult::PreData(node) => {
return {
println!(" TokenizerResult::PreData(node)");
TokenizerResult::PreData(node)
}
},
}
}
}
Expand Down Expand Up @@ -412,6 +425,8 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}

fn emit_current_tag(&self) -> ProcessResult<Sink::Handle> {
println!("emit_current_tag");
// println!("Custom backtrace: {}", Backtrace::capture());
self.finish_attribute();

let name = LocalName::from(&**self.current_tag_name.borrow());
Expand All @@ -438,16 +453,24 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
attrs: std::mem::take(&mut self.current_tag_attrs.borrow_mut()),
});

// qknight
match self.process_token(token) {
TokenSinkResult::Continue => ProcessResult::Continue,
TokenSinkResult::Plaintext => {
self.state.set(states::Plaintext);
ProcessResult::Continue
},
TokenSinkResult::Script(node) => {
println!("match self.process_token(token) for script");
self.state.set(states::Data);
ProcessResult::Script(node)
},
// maybe i could have used RawData
TokenSinkResult::PreData(node) => {
println!("match self.process_token(token) for pre_data");
self.state.set(states::Data);
ProcessResult::PreData(node)
},
TokenSinkResult::RawData(kind) => {
self.state.set(states::RawData(kind));
ProcessResult::Continue
Expand Down Expand Up @@ -704,7 +727,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
return self.step_char_ref_tokenizer(input);
}

trace!("processing in state {:?}", self.state);
println!("processing 1n state {:?}", self.state);
match self.state.get() {
//§ data-state
states::Data => loop {
Expand Down Expand Up @@ -759,6 +782,19 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
}
},

// //§ predata-state
states::RawData(PreData) => {
println!("====== states::RawData(PreData)");
loop {
match pop_except_from!(self, input, small_char_set!('\r' '\0' '<' '\n')) {
FromSet('\0') => go!(self: error; emit '\u{fffd}'),
FromSet('<') => go!(self: to RawLessThanSign PreData),
FromSet(c) => go!(self: emit c),
NotFromSet(b) => self.emit_chars(b),
}
}
},

//§ script-data-double-escaped-state
states::RawData(ScriptDataEscaped(DoubleEscaped)) => loop {
match pop_except_from!(self, input, small_char_set!('\r' '\0' '-' '<' '\n')) {
Expand Down Expand Up @@ -788,7 +824,9 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
'/' => go!(self: to EndTagOpen),
'?' => go!(self: error; clear_comment; reconsume BogusComment),
c => match lower_ascii_letter(c) {
Some(cl) => go!(self: create_tag StartTag cl; to TagName),
Some(cl) => {
go!(self: create_tag StartTag cl; to TagName)
},
None => go!(self: error; emit '<'; reconsume Data),
},
}
Expand All @@ -797,7 +835,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
//§ end-tag-open-state
states::EndTagOpen => loop {
match get_char!(self, input) {
'>' => go!(self: error; to Data),
'>' => {
//println!("tttt {}", cl);
go!(self: error; to Data)
},
c => match lower_ascii_letter(c) {
Some(cl) => go!(self: create_tag EndTag cl; to TagName),
None => go!(self: error; clear_comment; reconsume BogusComment),
Expand Down Expand Up @@ -1455,6 +1496,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
ProcessResult::Continue => (),
ProcessResult::Suspend => break,
ProcessResult::Script(_) => unreachable!(),
ProcessResult::PreData(_) => unreachable!(),
}
}

Expand Down Expand Up @@ -1498,6 +1540,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
| states::RawData(Rcdata)
| states::RawData(Rawtext)
| states::RawData(ScriptData)
| states::RawData(PreData)
| states::Plaintext => go!(self: eof),

states::TagName
Expand Down
1 change: 1 addition & 0 deletions html5ever/src/tokenizer/states.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub enum RawKind {
Rcdata,
Rawtext,
ScriptData,
PreData,
ScriptDataEscaped(ScriptEscapeKind),
}

Expand Down
Loading