Skip to content

Commit 80121b9

Browse files
Or Ganymrobinson
Or Gany
authored andcommitted
Adding comments for arena
1 parent 9b94335 commit 80121b9

File tree

5 files changed

+51
-11
lines changed

5 files changed

+51
-11
lines changed

html5ever/examples/arena.rs

+26-9
Original file line numberDiff line numberDiff line change
@@ -19,36 +19,32 @@ use std::collections::HashSet;
1919
use std::io::{self, Read};
2020
use std::ptr;
2121

22-
fn main() {
23-
let mut bytes = Vec::new();
24-
io::stdin().read_to_end(&mut bytes).unwrap();
25-
let arena = typed_arena::Arena::new();
26-
html5ever_parse_slice_into_arena(&bytes, &arena);
27-
}
28-
22+
/// By using our Sink type, the arena is filled with parsed HTML.
2923
fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
3024
let sink = Sink {
3125
arena,
3226
document: arena.alloc(Node::new(NodeData::Document)),
3327
quirks_mode: QuirksMode::NoQuirks,
3428
};
29+
3530
parse_document(sink, Default::default())
3631
.from_utf8()
3732
.one(bytes)
3833
}
3934

4035
type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;
41-
4236
type Ref<'arena> = &'arena Node<'arena>;
43-
4437
type Link<'arena> = Cell<Option<Ref<'arena>>>;
4538

39+
/// Sink struct is responsible for handling how the data that comes out of the HTML parsing
40+
/// unit (TreeBuilder in our case) is handled.
4641
struct Sink<'arena> {
4742
arena: Arena<'arena>,
4843
document: Ref<'arena>,
4944
quirks_mode: QuirksMode,
5045
}
5146

47+
/// DOM node which contains links to other nodes in the tree.
5248
pub struct Node<'arena> {
5349
parent: Link<'arena>,
5450
next_sibling: Link<'arena>,
@@ -58,6 +54,7 @@ pub struct Node<'arena> {
5854
data: NodeData<'arena>,
5955
}
6056

57+
/// HTML node data which can be an element, a comment, a string, a DOCTYPE, etc...
6158
pub enum NodeData<'arena> {
6259
Document,
6360
Doctype {
@@ -178,6 +175,11 @@ impl<'arena> Sink<'arena> {
178175
}
179176
}
180177

178+
/// By implementing the TreeSink trait we determine how the data from the tree building step
179+
/// is processed. In our case, our data is allocated in the arena and added to the Node data
180+
/// structure.
181+
///
182+
/// For deeper understating of each function go to the TreeSink declaration.
181183
impl<'arena> TreeSink for Sink<'arena> {
182184
type Handle = Ref<'arena>;
183185
type Output = Ref<'arena>;
@@ -333,3 +335,18 @@ impl<'arena> TreeSink for Sink<'arena> {
333335
}
334336
}
335337
}
338+
339+
/// In this example an "arena" is created and filled with the DOM nodes.
340+
/// "Arena" is a type of allocation in which a block of memory is allocated
341+
/// and later filled with data, DOM nodes in this case. When the arena is deallocated
342+
/// it is destroyed with all of its items.
343+
///
344+
/// Further info about arena: https://docs.rs/typed-arena/latest/typed_arena/
345+
fn main() {
346+
// Read HTML from the standard input
347+
let mut bytes = Vec::new();
348+
io::stdin().read_to_end(&mut bytes).unwrap();
349+
350+
let arena = typed_arena::Arena::new();
351+
html5ever_parse_slice_into_arena(&bytes, &arena);
352+
}

html5ever/examples/noop-tokenize.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,27 @@ use std::io;
1616
use html5ever::tendril::*;
1717
use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};
1818

19+
/// In our case, our sink only contains a tokens vector
1920
struct Sink(Vec<Token>);
2021

2122
impl TokenSink for Sink {
2223
type Handle = ();
2324

25+
/// Each processed token will be handled by this method
2426
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
25-
// Don't use the token, but make sure we don't get
26-
// optimized out entirely.
2727
self.0.push(token);
2828
TokenSinkResult::Continue
2929
}
3030
}
3131

32+
/// In this example we implement the TokenSink trait which lets us implement how each
33+
/// parsed token is treated. In our example we take each token and insert it into a vector.
3234
fn main() {
35+
// Read HTML from standard input
3336
let mut chunk = ByteTendril::new();
3437
io::stdin().read_to_tendril(&mut chunk).unwrap();
38+
39+
// Create a buffer queue for the tokenizer
3540
let mut input = BufferQueue::default();
3641
input.push_back(chunk.try_reinterpret().unwrap());
3742

html5ever/examples/noop-tree-builder.rs

+8
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ impl Sink {
3232
}
3333
}
3434

35+
/// By implementing the TreeSink trait we determine how the data from the tree building step
36+
/// is processed. In this case the DOM elements are written into the "names" hashmap.
37+
///
38+
/// For deeper understating of each function go to the TreeSink declaration.
3539
impl TreeSink for Sink {
3640
type Handle = usize;
3741
type Output = Self;
@@ -98,11 +102,15 @@ impl TreeSink for Sink {
98102
fn mark_script_already_started(&mut self, _node: &usize) {}
99103
}
100104

105+
/// In this example we implement the TreeSink trait which takes each parsed elements and insert
106+
/// it to a hashmap, while each element is given a numeric id.
101107
fn main() {
102108
let sink = Sink {
103109
next_id: 1,
104110
names: HashMap::new(),
105111
};
112+
113+
// Read HTML from the standard input and parse it
106114
let stdin = io::stdin();
107115
parse_document(sink, Default::default())
108116
.from_utf8()

html5ever/examples/print-tree-actions.rs

+3
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ impl TreeSink for Sink {
158158
}
159159
}
160160

161+
/// Same example as the "noop-tree-builder", but this time every function implemented in our
162+
/// Sink object prints a log, so it's easier to get an understating of when each function is
163+
/// called.
161164
fn main() {
162165
let sink = Sink {
163166
next_id: 1,

html5ever/examples/tokenize.rs

+7
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,16 @@ impl TokenSink for TokenPrinter {
8181
}
8282
}
8383

84+
/// In this example we implement the TokenSink trait in such a way that each token is printed.
85+
/// If a there's an error while processing a token it is printed as well.
8486
fn main() {
8587
let mut sink = TokenPrinter { in_char_run: false };
88+
89+
// Read HTML from standard input
8690
let mut chunk = ByteTendril::new();
8791
io::stdin().read_to_tendril(&mut chunk).unwrap();
92+
93+
// Create a buffer queue for the tokenizer
8894
let mut input = BufferQueue::default();
8995
input.push_back(chunk.try_reinterpret().unwrap());
9096

@@ -96,6 +102,7 @@ fn main() {
96102
},
97103
);
98104
let _ = tok.feed(&mut input);
105+
99106
assert!(input.is_empty());
100107
tok.end();
101108
sink.is_char(false);

0 commit comments

Comments
 (0)