diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php
index 1184403f7b..b08856e838 100644
--- a/packages/playground/data-liberation/bootstrap.php
+++ b/packages/playground/data-liberation/bootstrap.php
@@ -61,6 +61,7 @@
require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php';
+require_once __DIR__ . '/src/entity-readers/WP_EPub_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_WXR_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_Directory_Tree_Entity_Reader.php';
diff --git a/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php b/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php
index 329e75bc39..6aceeb9c53 100644
--- a/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php
+++ b/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php
@@ -28,31 +28,29 @@ class WP_HTML_To_Blocks implements WP_Block_Markup_Converter {
private $state = self::STATE_READY;
private $block_stack = array();
- private $html;
+ private $markup_processor;
private $ignore_text = false;
private $in_ephemeral_paragraph = false;
private $block_markup = '';
private $metadata = array();
+ private $last_error = null;
- public function __construct( $html ) {
- $this->html = WP_HTML_Processor::create_fragment( $html );
+ public function __construct( $markup_processor ) {
+ $this->markup_processor = $markup_processor;
}
- /**
- * @inheritDoc
- */
public function convert() {
if ( self::STATE_READY !== $this->state ) {
return false;
}
- while ( $this->html->next_token() ) {
- switch ( $this->html->get_token_type() ) {
+ while ( $this->markup_processor->next_token() ) {
+ switch ( $this->markup_processor->get_token_type() ) {
case '#text':
if ( $this->ignore_text ) {
break;
}
- $this->append_html( htmlspecialchars( $this->html->get_modifiable_text() ) );
+ $this->append_rich_text( htmlspecialchars( $this->markup_processor->get_modifiable_text() ) );
break;
case '#tag':
$this->handle_tag();
@@ -60,13 +58,16 @@ public function convert() {
}
}
+ if ( $this->markup_processor->get_last_error() ) {
+ $this->last_error = $this->markup_processor->get_last_error();
+ return false;
+ }
+
$this->close_ephemeral_paragraph();
+
return true;
}
- /**
- * @inheritDoc
- */
public function get_first_meta_value( $key ) {
if ( ! array_key_exists( $key, $this->metadata ) ) {
return null;
@@ -74,231 +75,204 @@ public function get_first_meta_value( $key ) {
return $this->metadata[ $key ][0];
}
- /**
- * @inheritDoc
- */
public function get_all_metadata() {
return $this->metadata;
}
- /**
- * @inheritDoc
- */
public function get_block_markup() {
return $this->block_markup;
}
- /**
- * Converts the currently matched HTML tag to block markup
- * or metadata.
- */
private function handle_tag() {
- $html = $this->html;
- $tag = $html->get_tag();
+ $html = $this->markup_processor;
+ $tag = strtoupper( $html->get_tag() );
$tag_lowercase = strtolower( $tag );
- $is_opener = ! $html->is_tag_closer() && $html->expects_closer();
- $is_closer = $html->is_tag_closer();
- $is_void_tag = ! $html->expects_closer();
- $prefix = (
- $is_void_tag ? '' : (
- $is_closer ? '-' : '+'
- )
- );
- $event = $prefix . $tag;
- switch ( $event ) {
- case 'META':
- $key = $html->get_attribute( 'name' );
- $value = $html->get_attribute( 'content' );
- if ( ! array_key_exists( $key, $this->metadata ) ) {
- $this->metadata[ $key ] = array();
- }
- $this->metadata[ $key ][] = $value;
- break;
- case 'IMG':
- $template = new \WP_HTML_Tag_Processor( '' );
- $template->next_tag();
- foreach ( array( 'alt', 'title', 'src' ) as $attr ) {
- if ( $html->get_attribute( $attr ) ) {
- $template->set_attribute( $attr, $html->get_attribute( $attr ) );
+ $is_void_tag = ! $html->expects_closer() && ! $html->is_tag_closer();
+ if ( $is_void_tag ) {
+ switch ( $tag ) {
+ case 'META':
+ $key = $html->get_attribute( 'name' );
+ $value = $html->get_attribute( 'content' );
+ if ( ! array_key_exists( $key, $this->metadata ) ) {
+ $this->metadata[ $key ] = array();
}
- }
- $this->append_html( $template->get_updated_html() );
- break;
- case 'INPUT':
- // Insert the input tag as HTML blocks.
- $this->push_block( 'html' );
- $template = new \WP_HTML_Tag_Processor( '' );
- $template->next_tag();
- $attrs = $this->html->get_attribute_names_with_prefix( '' );
- foreach ( $attrs as $attr ) {
- $template->set_attribute( $attr, $this->html->get_attribute( $attr ) );
- }
- $this->append_html( htmlspecialchars( $template->get_updated_html() ) );
- $this->pop_block();
- break;
- case 'HR':
- $this->push_block( 'separator' );
- $this->block_markup .= '
element? Let's convert it into a formatting element.
- * - A block element? Let's convert it into a block.
- */
- if ( $this->is_at_inline_code_element() ) {
- $this->append_html( '<' . $tag_lowercase . '>' );
- } else {
- $this->push_block( 'code' );
- $this->block_markup .= '<' . $tag_lowercase . ' class="wp-block-code">';
- }
- break;
- case '-CODE':
- $this->block_markup .= '' . $tag_lowercase . '>';
- if ( ! $this->is_at_inline_code_element() ) {
+ // Block elements
+ case 'UL':
+ case 'OL':
+ $this->block_markup .= '
'; - break; - case '-P': - $this->block_markup .= '
'; - $this->pop_block(); - break; + break; - case '+H1': - case '+H2': - case '+H3': - case '+H4': - case '+H5': - case '+H6': - $this->push_block( - 'heading', - array( - 'level' => (int) $tag[1] ? (int) $tag[1] : 1, - ) - ); - $this->block_markup .= 'Last week, WordPress 6.8 was released.
HTML; - $reader = new WP_HTML_Entity_Reader( $html, 1 ); + $reader = new WP_HTML_Entity_Reader( new WP_HTML_Processor( $html ), 1 ); $entities = []; while ( $reader->next_entity() ) { $data = $reader->get_entity()->get_data(); diff --git a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php index 41d6ba8ae8..d3daef742e 100644 --- a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php +++ b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php @@ -16,7 +16,7 @@ public function test_metadata_extraction() {Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.
HTML; - $converter = new WP_HTML_To_Blocks( $html ); + $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $html ) ); $converter->convert( $html ); $metadata = $converter->get_all_metadata(); $expected_metadata = [ @@ -35,7 +35,7 @@ public function test_metadata_extraction() { * @dataProvider provider_test_conversion */ public function test_html_to_blocks_conversion( $html, $expected ) { - $converter = new WP_HTML_To_Blocks( $html ); + $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $html ) ); $converter->convert( $html ); $blocks = $converter->get_block_markup(); @@ -89,16 +89,12 @@ public function provider_test_conversion() { ], 'Formatted text' => [ 'html' => 'Bold and Italic
', - 'expected' => "Bold and Italic
" + 'expected' => "Bold and Italic
" ], 'A blockquote' => [ 'html' => 'A simple blockquote', 'expected' => "
A simple blockquote" ], - 'A an tag' => [ - 'html' => '', - 'expected' => "<input type="text" value="A simple input"> " - ], 'A table' => [ 'html' => <<