@@ -19,36 +19,32 @@ use std::collections::HashSet;
19
19
use std:: io:: { self , Read } ;
20
20
use std:: ptr;
21
21
22
- fn main ( ) {
23
- let mut bytes = Vec :: new ( ) ;
24
- io:: stdin ( ) . read_to_end ( & mut bytes) . unwrap ( ) ;
25
- let arena = typed_arena:: Arena :: new ( ) ;
26
- html5ever_parse_slice_into_arena ( & bytes, & arena) ;
27
- }
28
-
22
+ /// By using our Sink type, the arena is filled with parsed HTML.
29
23
fn html5ever_parse_slice_into_arena < ' a > ( bytes : & [ u8 ] , arena : Arena < ' a > ) -> Ref < ' a > {
30
24
let sink = Sink {
31
25
arena,
32
26
document : arena. alloc ( Node :: new ( NodeData :: Document ) ) ,
33
27
quirks_mode : QuirksMode :: NoQuirks ,
34
28
} ;
29
+
35
30
parse_document ( sink, Default :: default ( ) )
36
31
. from_utf8 ( )
37
32
. one ( bytes)
38
33
}
39
34
40
35
type Arena < ' arena > = & ' arena typed_arena:: Arena < Node < ' arena > > ;
41
-
42
36
type Ref < ' arena > = & ' arena Node < ' arena > ;
43
-
44
37
type Link < ' arena > = Cell < Option < Ref < ' arena > > > ;
45
38
39
+ /// Sink struct is responsible for handling how the data that comes out of the HTML parsing
40
+ /// unit (TreeBuilder in our case) is handled.
46
41
struct Sink < ' arena > {
47
42
arena : Arena < ' arena > ,
48
43
document : Ref < ' arena > ,
49
44
quirks_mode : QuirksMode ,
50
45
}
51
46
47
+ /// DOM node which contains links to other nodes in the tree.
52
48
pub struct Node < ' arena > {
53
49
parent : Link < ' arena > ,
54
50
next_sibling : Link < ' arena > ,
@@ -58,6 +54,7 @@ pub struct Node<'arena> {
58
54
data : NodeData < ' arena > ,
59
55
}
60
56
57
+ /// HTML node data which can be an element, a comment, a string, a DOCTYPE, etc...
61
58
pub enum NodeData < ' arena > {
62
59
Document ,
63
60
Doctype {
@@ -178,6 +175,11 @@ impl<'arena> Sink<'arena> {
178
175
}
179
176
}
180
177
178
+ /// By implementing the TreeSink trait we determine how the data from the tree building step
179
+ /// is processed. In our case, our data is allocated in the arena and added to the Node data
180
+ /// structure.
181
+ ///
182
+ /// For deeper understating of each function go to the TreeSink declaration.
181
183
impl < ' arena > TreeSink for Sink < ' arena > {
182
184
type Handle = Ref < ' arena > ;
183
185
type Output = Ref < ' arena > ;
@@ -333,3 +335,18 @@ impl<'arena> TreeSink for Sink<'arena> {
333
335
}
334
336
}
335
337
}
338
+
339
+ /// In this example an "arena" is created and filled with the DOM nodes.
340
+ /// "Arena" is a type of allocation in which a block of memory is allocated
341
+ /// and later filled with data, DOM nodes in this case. When the arena is deallocated
342
+ /// it is destroyed with all of its items.
343
+ ///
344
+ /// Further info about arena: https://docs.rs/typed-arena/latest/typed_arena/
345
+ fn main ( ) {
346
+ // Read HTML from the standard input
347
+ let mut bytes = Vec :: new ( ) ;
348
+ io:: stdin ( ) . read_to_end ( & mut bytes) . unwrap ( ) ;
349
+
350
+ let arena = typed_arena:: Arena :: new ( ) ;
351
+ html5ever_parse_slice_into_arena ( & bytes, & arena) ;
352
+ }
0 commit comments