From 1c66dd0f1efe09fc1ebd9b83c5054e20ad79607b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Tue, 17 Dec 2024 13:36:04 +0100 Subject: [PATCH 01/26] [Data Liberation] Add HTML to Blocks converter Adds a basic WP_HTML_To_Blocks class that accepts HTML and outputs block markup. It only considers the markup and won't consider any visual changes introduced via CSS or JavaScript. A part of #1894 ## Example ```html $html = <<

Hello world!

HTML; $converter = new WP_HTML_To_Blocks( $html ); $converter->convert(); var_dump( $converter->get_all_metadata() ); /* * array( 'post_title' => array( 'My first post' ) ) */ var_dump( $converter->get_block_markup() ); /* * *

Hello world!

* */ ``` ## Testing instructions This PR mostly adds new code. Just confirm the unit tests pass in CI. --- .../data-liberation/blueprints-library | 2 +- .../playground/data-liberation/bootstrap.php | 5 + .../playground/data-liberation/phpunit.xml | 2 + .../WP_Block_Markup_Converter.php | 8 + .../src/block-markup/WP_HTML_To_Blocks.php | 395 + .../src/entity-readers/WP_Entity_Reader.php | 70 + .../entity-readers/WP_HTML_Entity_Reader.php | 95 + .../src/import/WP_Import_Utils.php | 96 + .../class-wp-html-processor.php | 10 +- .../tests/WPHTMLEntityReaderTests.php | 75 + .../tests/WPHTMLToBlocksTests.php | 141 + .../html-to-blocks/excerpt.input.html | 189 + .../html-to-blocks/excerpt.output.html | 9251 +++++++++++++++++ 13 files changed, 10336 insertions(+), 3 deletions(-) create mode 100644 packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Converter.php create mode 100644 packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php create mode 100644 packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php create mode 100644 packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php create mode 100644 packages/playground/data-liberation/src/import/WP_Import_Utils.php create mode 100644 packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php create mode 100644 packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php create mode 100644 packages/playground/data-liberation/tests/fixtures/html-to-blocks/excerpt.input.html create mode 100644 packages/playground/data-liberation/tests/fixtures/html-to-blocks/excerpt.output.html diff --git a/packages/playground/data-liberation/blueprints-library b/packages/playground/data-liberation/blueprints-library index 966e692625..b52a93ce17 160000 --- a/packages/playground/data-liberation/blueprints-library +++ b/packages/playground/data-liberation/blueprints-library @@ -1 +1 @@ -Subproject commit 966e6926256dc56c8473c6257d0d474be0f20811 +Subproject commit b52a93ce17562a1964fb27df770792fe165b217b diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index 9c38ff0a6e..978acd3942 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -44,14 +44,17 @@ require_once __DIR__ . '/src/wordpress-core-html-api/html5-named-character-references.php'; } +require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Converter.php'; require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Processor.php'; require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Url_Processor.php'; require_once __DIR__ . '/src/block-markup/WP_URL_In_Text_Processor.php'; +require_once __DIR__ . '/src/block-markup/WP_HTML_To_Blocks.php'; require_once __DIR__ . '/src/block-markup/WP_URL.php'; require_once __DIR__ . '/src/xml-api/WP_XML_Decoder.php'; require_once __DIR__ . '/src/xml-api/WP_XML_Processor.php'; require_once __DIR__ . '/src/wxr/WP_WXR_Reader.php'; +require_once __DIR__ . '/src/import/WP_Import_Utils.php'; require_once __DIR__ . '/src/import/WP_Block_Object.php'; require_once __DIR__ . '/src/import/WP_Entity_Importer.php'; require_once __DIR__ . '/src/import/WP_File_Visitor.php'; @@ -64,6 +67,8 @@ require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php'; require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php'; require_once __DIR__ . '/src/import/WP_Markdown_Importer.php'; +require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php'; +require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php'; require_once __DIR__ . '/src/utf8_decoder.php'; diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index 800b55f189..9646f33205 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -2,6 +2,8 @@ + tests/WPHTMLEntityReaderTests.php + tests/WPHTMLToBlocksTests.php tests/WPWXRReaderTests.php tests/WPRewriteUrlsTests.php tests/WPURLInTextProcessorTests.php diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Converter.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Converter.php new file mode 100644 index 0000000000..e3cd04b6de --- /dev/null +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Converter.php @@ -0,0 +1,8 @@ + + *

Hello world!

+ * + * Becomes: + * + * + *

Hello world!

+ * + * + * With the following metadata: + * + * array( + * 'post_title' => array( 'My first post' ), + * ) + */ +class WP_HTML_To_Blocks implements WP_Block_Markup_Converter { + const STATE_READY = 'STATE_READY'; + const STATE_COMPLETE = 'STATE_COMPLETE'; + + private $state = self::STATE_READY; + private $block_stack = array(); + private $html; + private $ignore_text = false; + private $in_ephemeral_paragraph = false; + private $block_markup = ''; + private $metadata = array(); + + public function __construct( $html ) { + $this->html = new \WP_HTML_Processor( $html ); + } + + public function convert() { + if ( self::STATE_READY !== $this->state ) { + return false; + } + + while ( $this->html->next_token() ) { + switch ( $this->html->get_token_type() ) { + case '#text': + if ( $this->ignore_text ) { + break; + } + $this->append_rich_text( htmlspecialchars( $this->html->get_modifiable_text() ) ); + break; + case '#tag': + $this->handle_tag(); + break; + } + } + + $this->close_ephemeral_paragraph(); + return true; + } + + public function get_meta_value( $key ) { + if ( ! array_key_exists( $key, $this->metadata ) ) { + return null; + } + return $this->metadata[ $key ][0]; + } + + public function get_all_metadata() { + return $this->metadata; + } + + public function get_block_markup() { + return $this->block_markup; + } + + private function handle_tag() { + $html = $this->html; + $tag = $html->get_tag(); + $tag_lowercase = strtolower( $tag ); + + $is_tag_opener = ! $html->is_tag_closer(); + if ( ! $html->expects_closer() ) { + switch ( $tag ) { + case 'META': + $key = $html->get_attribute( 'name' ); + $value = $html->get_attribute( 'content' ); + if ( ! array_key_exists( $key, $this->metadata ) ) { + $this->metadata[ $key ] = array(); + } + $this->metadata[ $key ][] = $value; + break; + case 'IMG': + $template = new \WP_HTML_Tag_Processor( '' ); + $template->next_tag(); + foreach ( array( 'alt', 'title', 'src' ) as $attr ) { + if ( $html->get_attribute( $attr ) ) { + $template->set_attribute( $attr, $html->get_attribute( $attr ) ); + } + } + /** + * + */ + $this->append_rich_text( $template->get_updated_html() ); + break; + default: + // @TODO: What to do with other void tags, e.g. ? + // Just insert an HTML block or what? + break; + } + } elseif ( $is_tag_opener ) { + switch ( $tag ) { + // Block elements + case 'SCRIPT': + $this->ignore_text = true; + break; + case 'UL': + case 'OL': + $this->push_block( 'list', array( 'ordered' => $tag === 'ol' ) ); + $this->block_markup .= ''; + $this->pop_block(); + break; + + case 'LI': + case 'BLOCKQUOTE': + case 'PRE': + case 'HR': + case 'P': + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + $this->block_markup .= ''; + $this->pop_block(); + break; + + case 'A': + $this->block_markup .= ''; + break; + + // Formats + default: + if ( $this->should_preserve_tag_in_rich_text( $tag ) ) { + $this->block_markup .= ''; + } + break; + } + } + } + + /** + * Checks whether the given tag is an inline formatting element + * that we want to preserve when parsing rich text. For example, + * tags are meaningful from the rich text perspective, but + *
tags are not. + * + * @param string $tag The tag to check. + * @return bool Whether the tag should be preserved in rich text. + */ + private function should_preserve_tag_in_rich_text( $tag ) { + return in_array( + $tag, + array( + 'B', + 'STRONG', + 'I', + 'U', + 'S', + 'SMALL', + 'SUP', + 'SUB', + 'MARK', + 'EM', + 'CITE', + 'DFN', + 'CODE', + 'KBD', + 'SAMP', + 'VAR', + ), + true + ); + } + + private function is_at_inline_code_element() { + $breadcrumbs = $this->html->get_breadcrumbs(); + foreach ( $breadcrumbs as $tag ) { + switch ( $tag ) { + case 'A': + case 'P': + case 'LI': + case 'TABLE': + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + return true; + } + } + return false; + } + + /** + * Appends a snippet of HTML to the block markup. + * Ensures given $html is a part of a block. If no block is + * currently open, it appends a new paragraph block. + * + * @param string $html The HTML snippet to append. + */ + private function append_rich_text( $html ) { + $html = trim( $html ); + if ( empty( $html ) ) { + return; + } + // Make sure two subsequent append_text() calls don't merge the text. + $html .= ' '; + $this->ensure_open_block(); + $this->block_markup .= $html; + } + + /** + * Pushes a new block onto the stack of open blocks and appends the block + * opener to the block markup. + * + * @param string $name The name of the block to push. + * @param array $attributes The attributes of the block to push. + */ + private function push_block( $name, $attributes = array() ) { + $this->close_ephemeral_paragraph(); + $block = new \WP_Block_Object( $name, $attributes ); + array_push( $this->block_stack, $block ); + $this->block_markup .= WP_Import_Utils::block_opener( $block->block_name, $block->attrs ) . "\n"; + } + + /** + * Pops the last block from the stack of open blocks and appends the block + * closer to the block markup. + * + * @return \WP_Block_Object The last block that was popped. + */ + private function pop_block() { + if ( ! empty( $this->block_stack ) ) { + $popped = array_pop( $this->block_stack ); + $this->block_markup .= WP_Import_Utils::block_closer( $popped->block_name ) . "\n"; + return $popped; + } + } + + /** + * Ensures that a block is open. If no block is currently open, it appends + * a new, ephemeral paragraph block that will be automatically closed + * when the next block opens OR when the HTML ends. + */ + private function ensure_open_block() { + if ( empty( $this->block_stack ) && ! $this->in_ephemeral_paragraph ) { + $this->block_markup .= WP_Import_Utils::block_opener( 'paragraph' ) . "\n"; + $this->block_markup .= '

'; + $this->in_ephemeral_paragraph = true; + } + } + + /** + * Closes the ephemeral paragraph if it is currently open. + */ + private function close_ephemeral_paragraph() { + if ( $this->in_ephemeral_paragraph ) { + $this->block_markup .= '

'; + $this->block_markup .= WP_Import_Utils::block_closer( 'paragraph' ); + $this->in_ephemeral_paragraph = false; + } + } +} diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php new file mode 100644 index 0000000000..77261f4b35 --- /dev/null +++ b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php @@ -0,0 +1,70 @@ +get_entity() && ! $this->is_finished() && ! $this->get_last_error() ) { + $this->next(); + } + return $this->get_entity(); + } + + private $last_next_result = null; + public function next(): void { + // @TODO: Don't keep track of this. Just make sure the next_entity() + // call will make the is_finished() true. + $this->last_next_result = $this->next_entity(); + } + + public function key(): string { + return $this->get_reentrancy_cursor(); + } + + public function valid(): bool { + return false !== $this->last_next_result && ! $this->is_finished() && ! $this->get_last_error(); + } + + public function rewind(): void { + // Haven't started yet. + if ( null === $this->last_next_result ) { + return; + } + _doing_it_wrong( + __METHOD__, + 'WP_WXR_Entity_Reader does not support rewinding.', + null + ); + } +} diff --git a/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php new file mode 100644 index 0000000000..b01bd0c875 --- /dev/null +++ b/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php @@ -0,0 +1,95 @@ +html = $html; + $this->post_id = $post_id; + } + + public function next_entity() { + // If we're finished, we're finished. + if ( $this->finished ) { + return false; + } + + // If we've already read some entities, skip to the next one. + if ( null !== $this->entities ) { + if ( count( $this->entities ) <= 1 ) { + $this->finished = true; + return false; + } + array_shift( $this->entities ); + return true; + } + + // We did not read any entities yet. Let's convert the HTML document into entities. + $converter = new WP_HTML_To_Blocks( $this->html ); + if ( false === $converter->convert() ) { + return false; + } + + $all_metadata = $converter->get_all_metadata(); + $post_fields = array(); + $other_metadata = array(); + foreach ( $all_metadata as $key => $values ) { + if ( in_array( $key, WP_Imported_Entity::POST_FIELDS, true ) ) { + $post_fields[ $key ] = $values[0]; + } else { + $other_metadata[ $key ] = $values[0]; + } + } + + // Yield the post entity. + $this->entities[] = new WP_Imported_Entity( + 'post', + array_merge( + $post_fields, + array( + 'post_id' => $this->post_id, + 'content' => $converter->get_block_markup(), + ) + ) + ); + + // Yield all the metadata that don't belong to the post entity. + foreach ( $other_metadata as $key => $value ) { + $this->entities[] = new WP_Imported_Entity( + 'post_meta', + array( + 'post_id' => $this->post_id, + 'meta_key' => $key, + 'meta_value' => $value, + ) + ); + } + return true; + } + + public function get_entity() { + if ( $this->is_finished() ) { + return false; + } + return $this->entities[0]; + } + + public function is_finished(): bool { + return $this->finished; + } + + public function get_last_error(): ?string { + return null; + } +} diff --git a/packages/playground/data-liberation/src/import/WP_Import_Utils.php b/packages/playground/data-liberation/src/import/WP_Import_Utils.php new file mode 100644 index 0000000000..91761e4223 --- /dev/null +++ b/packages/playground/data-liberation/src/import/WP_Import_Utils.php @@ -0,0 +1,96 @@ +"; + } + + public static function block_closer( $block_name ) { + return ""; + } + + /** + * Convert an array of WP_Block_Object objects to HTML markup. + * + * @param array $blocks The blocks to convert to markup. + * @return string The HTML markup. + */ + public static function convert_blocks_to_markup( $blocks ) { + $block_markup = ''; + + foreach ( $blocks as $block ) { + // Allow mixing of inner blocks and content strings. + if ( is_string( $block ) ) { + $block_markup .= $block; + continue; + } + // Start of block comment + $block_markup .= self::block_opener( $block->block_name, $block->attrs ); + $block_markup .= $block->attrs['content'] ?? ''; + $block_markup .= self::convert_blocks_to_markup( $block->inner_blocks ); + $block_markup .= self::block_closer( $block->block_name ); + } + + return $block_markup; + } + + public static function slug_to_title( $filename ) { + $name = pathinfo( $filename, PATHINFO_FILENAME ); + $name = preg_replace( '/^\d+/', '', $name ); + $name = str_replace( + array( '-', '_' ), + ' ', + $name + ); + $name = ucwords( $name ); + return $name; + } + + public static function remove_first_h1_block_from_block_markup( $html ) { + $p = WP_Import_HTML_Processor::create_fragment( $html ); + if ( false === $p->next_tag() ) { + return false; + } + if ( $p->get_tag() !== 'H1' ) { + return false; + } + $depth = $p->get_current_depth(); + $title = ''; + do { + if ( false === $p->next_token() ) { + break; + } + if ( $p->get_token_type() === '#text' ) { + $title .= $p->get_modifiable_text() . ' '; + } + } while ( $p->get_current_depth() > $depth ); + + if ( ! $title ) { + return false; + } + + // Move past the closing comment + $p->next_token(); + if ( $p->get_token_type() === '#text' ) { + $p->next_token(); + } + if ( $p->get_token_type() !== '#comment' ) { + return false; + } + + return array( + 'content' => trim( $title ), + 'remaining_html' => substr( + $html, + $p->get_string_index_after_current_token() + ), + ); + } +} diff --git a/packages/playground/data-liberation/src/wordpress-core-html-api/class-wp-html-processor.php b/packages/playground/data-liberation/src/wordpress-core-html-api/class-wp-html-processor.php index 14cb296d43..c2109168b4 100644 --- a/packages/playground/data-liberation/src/wordpress-core-html-api/class-wp-html-processor.php +++ b/packages/playground/data-liberation/src/wordpress-core-html-api/class-wp-html-processor.php @@ -1613,7 +1613,10 @@ private function step_in_head(): bool { */ $charset = $this->get_attribute( 'charset' ); if ( is_string( $charset ) && 'tentative' === $this->state->encoding_confidence ) { - $this->bail( 'Cannot yet process META tags with charset to determine encoding.' ); + // Commenting this out for now. We're assuming UTF-8 in WP_HTML_To_Blocks and + // we don't want to fail just because a document contained a meta tag with a UTF-8 charset. + // @TODO: Bail on non-utf8 charsets. + // $this->bail( 'Cannot yet process META tags with charset to determine encoding.' ); } /* @@ -1632,7 +1635,10 @@ private function step_in_head(): bool { 0 === strcasecmp( $http_equiv, 'Content-Type' ) && 'tentative' === $this->state->encoding_confidence ) { - $this->bail( 'Cannot yet process META tags with http-equiv Content-Type to determine encoding.' ); + // Commenting this out for now. We're assuming UTF-8 in WP_HTML_To_Blocks and + // we don't want to fail just because a document contained a meta tag with a UTF-8 charset. + // @TODO: Bail on non-utf8 charsets. + // $this->bail( 'Cannot yet process META tags with http-equiv Content-Type to determine encoding.' ); } return true; diff --git a/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php b/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php new file mode 100644 index 0000000000..be233599fa --- /dev/null +++ b/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php @@ -0,0 +1,75 @@ + + + + +

It is our pleasure to announce that WordPress 6.8 was released

+

Last week, WordPress 6.8 was released.

+HTML; + $reader = new WP_HTML_Entity_Reader( $html, 1 ); + $entities = []; + while ( $reader->next_entity() ) { + $data = $reader->get_entity()->get_data(); + if(isset($data['content'])) { + $data['content'] = $this->normalize_markup( $data['content'] ); + } + $entities[] = [ + 'type' => $reader->get_entity()->get_type(), + 'data' => $data, + ]; + } + $expected_entities = [ + [ + 'type' => 'post', + 'data' => [ + 'post_title' => 'WordPress 6.8 was released', + 'post_date' => '2024-12-16', + 'post_id' => 1, + 'content' => $this->normalize_markup(<< +

It is our pleasure to announce that WordPress 6.8 was released

+ + + +

Last week, WordPress 6.8 was released.

+ +HTML) + ] + ], + [ + 'type' => 'post_meta', + 'data' => [ + 'post_id' => 1, + 'meta_key' => 'custom_post_meta', + 'meta_value' => 'custom_post_meta_value', + ] + ], + [ + 'type' => 'post_meta', + 'data' => [ + 'post_id' => 1, + 'meta_key' => 'color_palette', + 'meta_value' => 'use_that_pretty_one', + ] + ], + ]; + $this->assertEquals( $expected_entities, $entities ); + } + + private function normalize_markup( $markup ) { + $processor = new WP_HTML_Processor( $markup ); + $serialized = $processor->serialize(); + if(str_ends_with($serialized, "")) { + $serialized = substr($serialized, 0, strlen("")); + } + return $serialized; + } + +} diff --git a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php new file mode 100644 index 0000000000..fc78ecc98a --- /dev/null +++ b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php @@ -0,0 +1,141 @@ + + + + + + + +

WordPress 6.8 was released

+

Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.

+HTML; + $converter = new WP_HTML_To_Blocks( $html ); + $converter->convert( $html ); + $metadata = $converter->get_all_metadata(); + $expected_metadata = [ + 'post_title' => ['WordPress 6.8 was released'], + 'post_date' => ['2024-12-16'], + 'post_modified' => ['2024-12-16'], + 'post_author' => ['1'], + 'post_author_name' => ['The WordPress Team'], + 'post_author_url' => ['https://wordpress.org'], + 'post_author_avatar' => ['https://wordpress.org/wp-content/uploads/2024/04/wordpress-logo-2024.png'], + ]; + $this->assertEquals( $expected_metadata, $metadata ); + } + + /** + * @dataProvider provider_test_conversion + */ + public function test_html_to_blocks_conversion( $html, $expected ) { + $converter = new WP_HTML_To_Blocks( $html ); + $converter->convert( $html ); + $blocks = $converter->get_block_markup(); + + $this->assertEquals( $this->normalize_markup($expected), $this->normalize_markup($blocks) ); + } + + private function normalize_markup( $markup ) { + $processor = new WP_HTML_Processor( $markup ); + $serialized = $processor->serialize(); + $serialized = trim( + str_replace( + [ + // Naively remove parts of the HTML that serialize() + // adds that we don't want. + '', + '', + // Even more naively, remove all the newlines. + "\n" + ], + '', + $serialized + ) + ); + return $serialized; + } + + public function provider_test_conversion() { + return [ + 'A simple paragraph' => [ + 'html' => '

A simple paragraph

', + 'expected' => "

A simple paragraph

" + ], + 'A simple list' => [ + 'html' => '
  • Item 1
  • Item 2
', + 'expected' => <<
    \n
  • Item 1
  • Item 2
+HTML + ], + 'A non-normative list' => [ + 'html' => '
). + * + * This method exists to provide a consistent interface with WP_HTML_Processor. + * + * @return bool Whether the tag is expected to be closed. + */ + public function expects_closer() { + if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { + return false; + } + + return ! $this->is_empty_element() && ! $this->is_closing_tag; + } + /** * Indicates if the currently matched tag is an empty element tag. * @@ -2604,6 +2673,9 @@ public function get_token_name() { case self::STATE_CDATA_NODE: return '#cdata-section'; + case self::STATE_DOCTYPE_NODE: + return '#doctype'; + case self::STATE_XML_DECLARATION: return '#xml-declaration'; @@ -3030,10 +3102,11 @@ private function step_in_prolog( $node_to_process = self::PROCESS_NEXT_NODE ) { $this->last_error = self::ERROR_SYNTAX; _doing_it_wrong( __METHOD__, 'Unexpected token type in prolog stage.', 'WP_VERSION' ); } - return $this->step(); - case '#xml-declaration': + // @TODO: Fail if there's more than one or if was found before the XML declaration token. + case '#doctype': case '#comment': + case '#xml-declaration': case '#processing-instructions': return true; case '#tag': @@ -3393,6 +3466,18 @@ private function mark_incomplete_input( */ const STATE_CDATA_NODE = 'STATE_CDATA_NODE'; + /** + * Parser DOCTYPE Node State. + * + * Indicates that the parser has found a DOCTYPE declaration and it's possible + * to read and modify its modifiable text. + * + * @since WP_VERSION + * + * @access private + */ + const STATE_DOCTYPE_NODE = 'STATE_DOCTYPE_NODE'; + /** * Indicates that the parser has found an XML processing instruction. * diff --git a/packages/playground/data-liberation/tests/WPEPubEntityReaderTests.php b/packages/playground/data-liberation/tests/WPEPubEntityReaderTests.php index 4a6c7a2324..f6a4c205f4 100644 --- a/packages/playground/data-liberation/tests/WPEPubEntityReaderTests.php +++ b/packages/playground/data-liberation/tests/WPEPubEntityReaderTests.php @@ -21,6 +21,7 @@ public function test_entity_reader( $reader ) { 'data' => $data, ]; } + $this->assertNull( $reader->get_last_error() ); $this->assertEquals( 3, count($entities) ); $this->assertEquals( 117, strlen($entities[0]['data']['content']) ); $this->assertGreaterThan( 1000, strlen($entities[1]['data']['content']) ); diff --git a/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php b/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php index be233599fa..f8d65c0357 100644 --- a/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php +++ b/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php @@ -13,7 +13,7 @@ public function test_entity_reader() {

It is our pleasure to announce that WordPress 6.8 was released

Last week, WordPress 6.8 was released.

HTML; - $reader = new WP_HTML_Entity_Reader( $html, 1 ); + $reader = new WP_HTML_Entity_Reader( new WP_HTML_Processor( $html ), 1 ); $entities = []; while ( $reader->next_entity() ) { $data = $reader->get_entity()->get_data(); diff --git a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php index 66cae9670e..66a1a64306 100644 --- a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php +++ b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php @@ -16,7 +16,7 @@ public function test_metadata_extraction() {

WordPress 6.8 was released

Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.

HTML; - $converter = new WP_HTML_To_Blocks( $html ); + $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $html ) ); $converter->convert( $html ); $metadata = $converter->get_all_metadata(); $expected_metadata = [ @@ -35,7 +35,7 @@ public function test_metadata_extraction() { * @dataProvider provider_test_conversion */ public function test_html_to_blocks_conversion( $html, $expected ) { - $converter = new WP_HTML_To_Blocks( $html ); + $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $html ) ); $converter->convert( $html ); $blocks = $converter->get_block_markup(); @@ -136,7 +136,7 @@ public function provider_test_conversion() { public function test_html_to_blocks_excerpt() { $input = file_get_contents( __DIR__ . '/fixtures/html-to-blocks/excerpt.input.html' ); - $converter = new WP_HTML_To_Blocks( $input ); + $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $input ) ); $converter->convert( $input ); $blocks = $converter->get_block_markup(); diff --git a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php index 2c3646dada..0e1dbf1ec4 100644 --- a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php +++ b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php @@ -1749,4 +1749,46 @@ public function test_pause_and_resume() { $this->assertEquals( 'Hello there', $resumed->get_modifiable_text(), 'Did not find the expected text.' ); } -} \ No newline at end of file + /** + * @ticket 61365 + * + * @covers WP_XML_Processor::next_token + */ + public function test_doctype_parsing() { + $processor = WP_XML_Processor::create_from_string( + 'Content' + ); + + $this->assertTrue( $processor->next_token(), 'Did not find DOCTYPE node' ); + $this->assertEquals( '#doctype', $processor->get_token_type(), 'Did not find DOCTYPE node' ); + $this->assertTrue( $processor->next_token(), 'Did not find root tag' ); + $this->assertEquals( 'root', $processor->get_tag(), 'Did not find root tag' ); + } + + /** + * @ticket 61365 + * + * @covers WP_XML_Processor::next_token + */ + public function test_unsupported_doctype_parsing() { + $processor = WP_XML_Processor::create_from_string( + 'Content' + ); + + $this->assertFalse( $processor->next_token(), 'Did not reject complex DOCTYPE' ); + $this->assertEquals( 'syntax', $processor->get_last_error(), 'Did not set syntax error' ); + } + + public function test_doctype_in_tag_content_is_syntax_error() { + $processor = WP_XML_Processor::create_from_string( + 'Content' + ); + + $processor->next_token(); + $processor->next_token(); + + $this->assertFalse( $processor->next_token(), 'Did not reject DOCTYPE in tag content' ); + $this->assertEquals( 'syntax', $processor->get_last_error(), 'Did not set syntax error' ); + } + +} From 58def6c915eb50c7e45df21674e530156767e250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Tue, 17 Dec 2024 16:02:56 +0100 Subject: [PATCH 11/26] Parse EPubs as XHTML --- .../src/block-markup/WP_HTML_To_Blocks.php | 8 +++---- .../entity-readers/WP_EPub_Entity_Reader.php | 4 ++-- .../src/xml-api/WP_XML_Processor.php | 7 +++--- .../tests/WPEPubEntityReaderTests.php | 22 ++--------------- .../tests/WPHTMLToBlocksTests.php | 24 ++++++++++++++++++- 5 files changed, 33 insertions(+), 32 deletions(-) diff --git a/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php b/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php index d4f2118e85..0d36c5629e 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php +++ b/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php @@ -45,7 +45,6 @@ public function convert() { } while ( $this->markup_processor->next_token() ) { - var_dump( $this->markup_processor->get_token_type() ); switch ( $this->markup_processor->get_token_type() ) { case '#text': if ( $this->ignore_text ) { @@ -58,7 +57,6 @@ public function convert() { break; } } - var_dump( $this->markup_processor->get_last_error() ); if ( $this->markup_processor->get_last_error() ) { $this->last_error = $this->markup_processor->get_last_error(); @@ -90,8 +88,8 @@ private function handle_tag() { $tag = strtoupper( $html->get_tag() ); $tag_lowercase = strtolower( $tag ); - $is_tag_opener = ! $html->is_tag_closer(); - if ( ! $html->expects_closer() ) { + $is_void_tag = ! $html->expects_closer() && ! $html->is_tag_closer(); + if ( $is_void_tag ) { switch ( $tag ) { case 'META': $key = $html->get_attribute( 'name' ); @@ -119,7 +117,7 @@ private function handle_tag() { // Just insert an HTML block or what? break; } - } elseif ( $is_tag_opener ) { + } elseif ( ! $html->is_tag_closer() ) { switch ( $tag ) { // Block elements case 'SCRIPT': diff --git a/packages/playground/data-liberation/src/entity-readers/WP_EPub_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_EPub_Entity_Reader.php index e3a6c2a06b..db7b8b9df3 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_EPub_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_EPub_Entity_Reader.php @@ -93,8 +93,8 @@ public function next_entity() { return false; } - $html_file = array_shift( $this->remaining_html_files ); - $html = $this->zip->read_file( $html_file ); + $html_file = array_shift( $this->remaining_html_files ); + $html = $this->zip->read_file( $html_file ); $this->current_html_reader = new WP_HTML_Entity_Reader( WP_XML_Processor::create_from_string( $html ), $this->current_post_id diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php index 22e3039818..b6b2a7669e 100644 --- a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php +++ b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php @@ -1628,13 +1628,13 @@ private function parse_next_tag() { return false; } - $closer_at = $at; + $closer_at = $at; $this->parser_state = self::STATE_DOCTYPE_NODE; $this->token_length = $closer_at + 1 - $this->token_starts_at; $this->bytes_already_parsed = $closer_at + 1; return true; } - + /* * Anything else here is either unsupported at this point or invalid * syntax. See the class-level @TODO annotations for more information. @@ -1644,7 +1644,6 @@ private function parse_next_tag() { return false; } - /* * An `is_empty_element() && ! $this->is_closing_tag; + return $this->is_tag_opener() && ! $this->is_empty_element(); } /** diff --git a/packages/playground/data-liberation/tests/WPEPubEntityReaderTests.php b/packages/playground/data-liberation/tests/WPEPubEntityReaderTests.php index f6a4c205f4..c6bf17248c 100644 --- a/packages/playground/data-liberation/tests/WPEPubEntityReaderTests.php +++ b/packages/playground/data-liberation/tests/WPEPubEntityReaderTests.php @@ -13,9 +13,6 @@ public function test_entity_reader( $reader ) { $entities = []; while ( $reader->next_entity() ) { $data = $reader->get_entity()->get_data(); - if(isset($data['content'])) { - $data['content'] = $this->normalize_markup( $data['content'] ); - } $entities[] = [ 'type' => $reader->get_entity()->get_type(), 'data' => $data, @@ -23,9 +20,10 @@ public function test_entity_reader( $reader ) { } $this->assertNull( $reader->get_last_error() ); $this->assertEquals( 3, count($entities) ); - $this->assertEquals( 117, strlen($entities[0]['data']['content']) ); + $this->assertGreaterThan( 100, strlen($entities[0]['data']['content']) ); $this->assertGreaterThan( 1000, strlen($entities[1]['data']['content']) ); $this->assertGreaterThan( 1000, strlen($entities[2]['data']['content']) ); + echo $entities[2]['data']['content']; } public function epub_byte_reader_data_provider() { @@ -39,20 +37,4 @@ public function epub_byte_reader_data_provider() { ]; } - private function normalize_markup( $markup ) { - $processor = new WP_HTML_Processor( $markup ); - $serialized = $processor->serialize(); - // Naively remove parts of the HTML that serialize() - // adds that we don't want. - $serialized = str_replace( - [ - '', - '', - ], - '', - $serialized - ); - return $serialized; - } - } diff --git a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php index 66a1a64306..91359b9e47 100644 --- a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php +++ b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php @@ -146,7 +146,29 @@ public function test_html_to_blocks_excerpt() { } $this->assertEquals( file_get_contents( $output_file ), $blocks ); - + } + + public function test_xhtml_to_blocks_conversion() { + $input = << + + + +

Hello, world!

+

And some content

+ + +XML; + $converter = new WP_HTML_To_Blocks( WP_XML_Processor::create_from_string( $input ) ); + $converter->convert( $input ); + $blocks = $converter->get_block_markup(); + $expected = <<

Hello, world!

And some content

+HTML; + $this->assertEquals( + $this->normalize_markup( $expected ), + $this->normalize_markup( $blocks ) + ); } } From 608b5e8f4f9af54f267184754d64a997d8555994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Tue, 17 Dec 2024 19:49:04 +0100 Subject: [PATCH 12/26] [Data Liberation] Import static files from a local static-pages directory --- .../playground/data-liberation/plugin.php | 46 ++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/packages/playground/data-liberation/plugin.php b/packages/playground/data-liberation/plugin.php index f17704ebcc..5cffb57d96 100644 --- a/packages/playground/data-liberation/plugin.php +++ b/packages/playground/data-liberation/plugin.php @@ -39,6 +39,46 @@ function () { } ); +/** + * Import static pages from a disk, if one exists. + */ +function import_static_pages() { + $static_root = WP_CONTENT_DIR . '/uploads/static-pages'; + if ( ! is_dir( $static_root ) ) { + return; + } + $reader = WP_Directory_Tree_Entity_Reader::create( + new WP_Filesystem(), + array ( + 'root_dir' => $static_root, + 'first_post_id' => 1, + 'allowed_extensions' => array( 'md' ), + 'index_file_patterns' => array( '#^index\.md$#' ), + 'markup_converter_factory' => function( $content ) { + return new WP_Markdown_To_Blocks( $content ); + }, + ) + ); + + $importer = WP_Stream_Importer::create( + function () use ( $reader ) { + return $reader; + }, + array(), + null + ); + + $import_session = WP_Import_Session::create( + array( + 'data_source' => 'static_pages', + 'importer' => $importer, + ) + ); + + data_liberation_import_step( $import_session, $importer ); +} +register_activation_hook( __FILE__, 'import_static_pages' ); + add_action( 'init', function () { @@ -346,9 +386,11 @@ function data_liberation_process_import() { } add_action( 'data_liberation_process_import', 'data_liberation_process_import' ); -function data_liberation_import_step( $session ) { +function data_liberation_import_step( $session, $importer = null ) { $metadata = $session->get_metadata(); - $importer = data_liberation_create_importer( $metadata ); + if ( ! $importer ) { + $importer = data_liberation_create_importer( $metadata ); + } if ( ! $importer ) { return; } From e44d2c23e402ea5d275cd0ffacb7db49ab6f7c21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Wed, 18 Dec 2024 03:12:54 +0100 Subject: [PATCH 13/26] Experimental static page editing workflow and blocks -> markdown converter --- .../src/WP_Blocks_To_Markdown.php | 182 +++++++++++++ .../src/bootstrap.php | 1 + .../data-liberation/bin/rewrite-urls.php | 4 +- .../playground/data-liberation/plugin.php | 40 --- .../WP_Block_Markup_Processor.php | 247 ++++++++++++------ .../WP_Block_Markup_Url_Processor.php | 9 +- .../WP_Directory_Tree_Entity_Reader.php | 142 +++++----- .../data-liberation/src/functions.php | 2 +- .../src/import/WP_Import_Utils.php | 2 +- .../src/import/WP_Stream_Importer.php | 6 +- .../tests/WPBlockMarkupUrlProcessorTests.php | 16 +- 11 files changed, 453 insertions(+), 198 deletions(-) create mode 100644 packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php diff --git a/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php b/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php new file mode 100644 index 0000000000..a70124874e --- /dev/null +++ b/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php @@ -0,0 +1,182 @@ +blocks = WP_Block_Markup_Processor::create_fragment($block_markup); + $this->metadata = $metadata; + $this->context_breadcrumbs = $context_breadcrumbs; + } + + public function convert() { + $this->blocks_to_markdown(); + return true; + } + + public function get_markdown() { + return $this->markdown; + } + + private function blocks_to_markdown() { + if($this->metadata) { + $this->markdown .= "---\n"; + foreach($this->metadata as $key => $value) { + // @TODO: Apply correct YAML value escaping + $value = json_encode($value); + $this->markdown .= "$key: $value\n"; + } + $this->markdown .= "---\n\n"; + } + + while($this->blocks->next_token()) { + switch($this->blocks->get_token_type()) { + case '#block-comment': + $this->handle_block_comment(); + break; + case '#tag': + $this->handle_tag(); + break; + case '#text': + $this->markdown .= ltrim(preg_replace('/ +/', ' ', $this->blocks->get_modifiable_text())); + break; + } + } + } + + private function handle_block_comment() { + if ( $this->blocks->is_block_closer() ) { + return; + } + switch($this->blocks->get_block_name()) { + case 'wp:quote': + $markdown = $this->skip_and_convert_inner_html(); + $lines = explode("\n", $markdown); + foreach($lines as $line) { + $this->markdown .= "> $line\n"; + } + $this->markdown .= ">\n"; + break; + case 'wp:list': + $markdown = $this->skip_and_convert_inner_html(); + $lines = explode("\n", $markdown); + foreach($lines as $line) { + if($line) { + $this->markdown .= "* $line\n"; + } + } + break; + case 'wp:list-item': + $this->markdown .= $this->skip_and_convert_inner_html() . "\n"; + break; + case 'wp:code': + $code = $this->skip_and_convert_inner_html(); + $language = $this->blocks->get_block_attribute('language') ?? ''; + $fence = str_repeat('`', max(3, $this->longest_sequence_of($code, '`') + 1)); + $this->markdown .= "$fence$language\n$code\n$fence\n\n"; + break; + case 'wp:image': + $alt = $this->blocks->get_block_attribute('alt') ?? ''; + $url = $this->blocks->get_block_attribute('url'); + $this->markdown .= "![$alt]($url)\n\n"; + break; + case 'wp:heading': + $level = $this->blocks->get_block_attribute('level') ?? 1; + $content = $this->skip_and_convert_inner_html(); + $this->markdown .= str_repeat('#', $level) . ' ' . $content . "\n\n"; + break; + case 'wp:paragraph': + $this->markdown .= $this->skip_and_convert_inner_html() . "\n\n"; + break; + case 'wp:separator': + $this->markdown .= "\n---\n\n"; + break; + default: + $code = ''; + $code .= ''; + $code .= $this->skip_and_convert_inner_html(); + $code .= ''; + $language = 'block'; + $fence = str_repeat('`', max(3, $this->longest_sequence_of($code, '`') + 1)); + $this->markdown .= "$fence$language\n$code\n$fence\n\n"; + break; + } + } + + private function handle_tag() { + $prefix = $this->blocks->is_tag_closer() ? '-' : '+'; + $event = $prefix . $this->blocks->get_tag(); + switch($event) { + case '+B': + case '-B': + case '+STRONG': + case '-STRONG': + $this->markdown .= '**'; + break; + case '+I': + case '-I': + case '+EM': + case '-EM': + $this->markdown .= '*'; + break; + case '+U': + case '-U': + $this->markdown .= '_'; + break; + case '+CODE': + case '-CODE': + if(!in_array('wp:code', $this->get_block_breadcrumbs(), true)) { + $this->markdown .= '`'; + } + break; + case '+A': + $href = $this->blocks->get_attribute('href'); + $this->markdown .= '['; + break; + case '-A': + $href = $this->blocks->get_attribute('href'); + $this->markdown .= "]($href)"; + break; + case '+BR': + $this->markdown .= "\n"; + break; + case '+IMG': + $alt = $this->blocks->get_attribute('alt') ?? ''; + $url = $this->blocks->get_attribute('src'); + $this->markdown .= "![$alt]($url)\n\n"; + break; + } + } + + private function skip_and_convert_inner_html() { + $html = $this->blocks->skip_and_get_block_inner_html(); + $converter = new WP_Blocks_To_Markdown($html, [], $this->get_block_breadcrumbs()); + $converter->convert(); + return $converter->get_markdown(); + } + + private function longest_sequence_of($input, $substring) { + $at = 0; + $sequence_length = 0; + while($at < strlen($input)) { + $at += strcspn($input, $substring, $at); + $current_sequence_length = strspn($input, $substring, $at); + if($current_sequence_length > $sequence_length) { + $sequence_length = $current_sequence_length; + } + $at += $current_sequence_length; + } + return $sequence_length; + } + + private function get_block_breadcrumbs() { + return array_merge($this->context_breadcrumbs, $this->blocks->get_block_breadcrumbs()); + } + +} diff --git a/packages/playground/data-liberation-markdown/src/bootstrap.php b/packages/playground/data-liberation-markdown/src/bootstrap.php index 2d5b20fd37..d12e418bce 100644 --- a/packages/playground/data-liberation-markdown/src/bootstrap.php +++ b/packages/playground/data-liberation-markdown/src/bootstrap.php @@ -2,5 +2,6 @@ require_once __DIR__ . '/WP_Markdown_Importer.php'; require_once __DIR__ . '/WP_Markdown_To_Blocks.php'; +require_once __DIR__ . '/WP_Blocks_To_Markdown.php'; require_once __DIR__ . '/../vendor/autoload.php'; diff --git a/packages/playground/data-liberation/bin/rewrite-urls.php b/packages/playground/data-liberation/bin/rewrite-urls.php index c457b7a24d..da27b7caa1 100644 --- a/packages/playground/data-liberation/bin/rewrite-urls.php +++ b/packages/playground/data-liberation/bin/rewrite-urls.php @@ -35,7 +35,7 @@ // @TODO: Decide – should the current site URL be always required to // populate $base_url? $base_url = $options['from-url'] ?? 'https://playground.internal'; -$p = new WP_Block_Markup_Url_Processor( $block_markup, $base_url ); +$p = WP_Block_Markup_Url_Processor::create_from_html( $block_markup, $base_url ); switch ( $command ) { case 'list_urls': @@ -76,7 +76,7 @@ function wp_list_urls_in_block_markup( $options ) { $block_markup = $options['block_markup']; $base_url = $options['base_url'] ?? 'https://playground.internal'; - $p = new WP_Block_Markup_Url_Processor( $block_markup, $base_url ); + $p = WP_Block_Markup_Url_Processor::create_from_html( $block_markup, $base_url ); while ( $p->next_url() ) { // Skip empty relative URLs. if ( ! trim( $p->get_raw_url() ) ) { diff --git a/packages/playground/data-liberation/plugin.php b/packages/playground/data-liberation/plugin.php index 5cffb57d96..8c319005c3 100644 --- a/packages/playground/data-liberation/plugin.php +++ b/packages/playground/data-liberation/plugin.php @@ -39,46 +39,6 @@ function () { } ); -/** - * Import static pages from a disk, if one exists. - */ -function import_static_pages() { - $static_root = WP_CONTENT_DIR . '/uploads/static-pages'; - if ( ! is_dir( $static_root ) ) { - return; - } - $reader = WP_Directory_Tree_Entity_Reader::create( - new WP_Filesystem(), - array ( - 'root_dir' => $static_root, - 'first_post_id' => 1, - 'allowed_extensions' => array( 'md' ), - 'index_file_patterns' => array( '#^index\.md$#' ), - 'markup_converter_factory' => function( $content ) { - return new WP_Markdown_To_Blocks( $content ); - }, - ) - ); - - $importer = WP_Stream_Importer::create( - function () use ( $reader ) { - return $reader; - }, - array(), - null - ); - - $import_session = WP_Import_Session::create( - array( - 'data_source' => 'static_pages', - 'importer' => $importer, - ) - ); - - data_liberation_import_step( $import_session, $importer ); -} -register_activation_hook( __FILE__, 'import_static_pages' ); - add_action( 'init', function () { diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php index 101cc63484..9eab194b3b 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php @@ -11,12 +11,14 @@ * If the post cannot fit into memory, WordPress won't be able to render it * anyway. */ -class WP_Block_Markup_Processor extends WP_HTML_Tag_Processor { +class WP_Block_Markup_Processor extends WP_HTML_Processor { private $block_name; protected $block_attributes; private $block_attributes_updated; private $block_closer; + private $stack_of_open_blocks = array(); + private $last_block_error; /** * @var \RecursiveIteratorIterator @@ -37,6 +39,60 @@ public function get_token_type(): ?string { } } + public function get_last_error(): ?string { + return $this->last_block_error ?? parent::get_last_error(); + } + + public function skip_and_get_block_inner_html() { + if('#block-comment' !== $this->get_token_type()) { + return false; + } + + if($this->is_block_closer()) { + return false; + } + + if(false === WP_HTML_Tag_Processor::set_bookmark('block-start')) { + return false; + } + + $starting_block_depth = $this->get_block_depth(); + while($this->next_token()) { + if( + $this->get_token_type() === '#block-comment' && + $this->is_block_closer() && + $this->get_block_depth() === $starting_block_depth - 1 + ) { + break; + } + } + + if(false === WP_HTML_Tag_Processor::set_bookmark('block-end')) { + WP_HTML_Tag_Processor::release_bookmark('block-start'); + return false; + } + + $inner_html_start = $this->bookmarks['block-start']->start + $this->bookmarks['block-start']->length; + $inner_html_end = $this->bookmarks['block-end']->start - $inner_html_start; + + WP_HTML_Tag_Processor::release_bookmark('block-start'); + WP_HTML_Tag_Processor::release_bookmark('block-end'); + + return substr( + $this->html, + $inner_html_start, + $inner_html_end + ); + } + + public function get_block_depth() { + return count($this->stack_of_open_blocks); + } + + public function get_block_breadcrumbs() { + return $this->stack_of_open_blocks; + } + /** * Returns the name of the block if the current token is a block comment. * @@ -58,101 +114,142 @@ public function get_block_attributes() { return $this->block_attributes; } + public function get_block_attribute($attribute_name) { + if ( null === $this->block_attributes ) { + return false; + } + + return $this->block_attributes[$attribute_name] ?? false; + } + public function is_block_closer() { return $this->block_name !== null && $this->block_closer === true; } + private $in_next_token = false; public function next_token(): bool { - $this->get_updated_html(); - - $this->block_name = null; - $this->block_attributes = null; - $this->block_closer = false; - $this->block_attributes_updated = false; - - if ( parent::next_token() === false ) { - return false; + // Prevent running next_token() logic twice when the parent method + // makes recursive calls to itself. + if($this->in_next_token) { + return parent::next_token(); } + $this->in_next_token = true; + try { + $this->get_updated_html(); + + $this->block_name = null; + $this->block_attributes = null; + $this->block_closer = false; + $this->block_attributes_updated = false; + + while ( true ) { + if ( parent::next_token() === false ) { + return false; + } - if ( parent::get_token_type() !== '#comment' ) { - return true; - } + if ( + $this->get_token_type() === '#tag' && ( + $this->get_tag() === 'HTML' || + $this->get_tag() === 'HEAD' || + $this->get_tag() === 'BODY' + ) + ) { + continue; + } - $text = parent::get_modifiable_text(); - /** - * Try to parse as a block. The block parser won't cut it because - * while it can parse blocks, it has no semantics for rewriting the - * block markup. Let's do our best here: - */ - $at = strspn( $text, ' \t\f\r\n' ); // Whitespace. + break; + } - if ( $at >= strlen( $text ) ) { - // This is an empty comment. Not a block. - return true; - } + if ( parent::get_token_type() !== '#comment' ) { + return true; + } - // Blocks closers start with the solidus character (`/`). - if ( '/' === $text[ $at ] ) { - $this->block_closer = true; - ++$at; - } + $text = parent::get_modifiable_text(); + /** + * Try to parse as a block. The block parser won't cut it because + * while it can parse blocks, it has no semantics for rewriting the + * block markup. Let's do our best here: + */ + $at = strspn( $text, ' \t\f\r\n' ); // Whitespace. + + if ( $at >= strlen( $text ) ) { + // This is an empty comment. Not a block. + return true; + } - // Blocks start with wp. - if ( ! ( - $at + 3 < strlen( $text ) && - $text[ $at ] === 'w' && - $text[ $at + 1 ] === 'p' && - $text[ $at + 2 ] === ':' - ) ) { - return true; - } + // Blocks closers start with the solidus character (`/`). + if ( '/' === $text[ $at ] ) { + $this->block_closer = true; + ++$at; + } - $name_starts_at = $at; + // Blocks start with wp. + if ( ! ( + $at + 3 < strlen( $text ) && + $text[ $at ] === 'w' && + $text[ $at + 1 ] === 'p' && + $text[ $at + 2 ] === ':' + ) ) { + return true; + } - // Skip wp. - $at += 3; + $name_starts_at = $at; - // Parse the actual block name after wp. - $name_length = strspn( $text, 'abcdefghijklmnopqrstuwxvyzABCDEFGHIJKLMNOPRQSTUWXVYZ0123456789_-', $at ); - if ( $name_length === 0 ) { - // This wasn't a block after all, just a regular comment. - return true; - } - $name = substr( $text, $name_starts_at, $name_length + 3 ); - $at += $name_length; + // Skip wp. + $at += 3; - // Skip the whitespace that follows the block name. - $at += strspn( $text, ' \t\f\r\n', $at ); - if ( $at >= strlen( $text ) ) { - // It's a block without attributes. - $this->block_name = $name; + // Parse the actual block name after wp. + $name_length = strspn( $text, 'abcdefghijklmnopqrstuwxvyzABCDEFGHIJKLMNOPRQSTUWXVYZ0123456789_-', $at ); + if ( $name_length === 0 ) { + // This wasn't a block after all, just a regular comment. + return true; + } + $name = substr( $text, $name_starts_at, $name_length + 3 ); + $at += $name_length; - return true; - } + // Skip the whitespace that follows the block name. + $at += strspn( $text, ' \t\f\r\n', $at ); + if ( $at < strlen( $text ) ) { + // It may be a block with attributes... - // It seems we may have block attributes here. + // ...but block closers cannot have attributes. + if ( $this->block_closer ) { + return true; + } - // Block closers cannot have attributes. - if ( $this->block_closer ) { - return true; - } + // Let's try to parse attributes as JSON. + $json_maybe = substr( $text, $at ); + $attributes = json_decode( $json_maybe, true ); + if ( null === $attributes || ! is_array( $attributes ) ) { + // This comment looked like a block comment, but the attributes didn't + // parse as a JSON array. This means it wasn't a block after all. + return true; + } + } else { + // This is a block without attributes. + $attributes = array(); + } + + // We have a block name and a valid attributes array. We may not find a block + // closer, but let's assume is a block and process it as such. + // @TODO: Confirm that WordPress block parser would have parsed this as a block. + $this->block_name = $name; + $this->block_attributes = $attributes; + + if($this->block_closer) { + $popped = array_pop($this->stack_of_open_blocks); + if($popped !== $name) { + $this->last_block_error = sprintf('Block closer %s does not match the last opened block %s.', $name, $popped); + return false; + } + } else { + array_push($this->stack_of_open_blocks, $name); + } - // Let's try to parse them as JSON. - $json_maybe = substr( $text, $at ); - $attributes = json_decode( $json_maybe, true ); - if ( null === $attributes || ! is_array( $attributes ) ) { - // This comment looked like a block comment, but the attributes didn't - // parse as a JSON array. This means it wasn't a block after all. return true; + } finally { + $this->in_next_token = false; } - - // We have a block name and a valid attributes array. We may not find a block - // closer, but let's assume is a block and process it as such. - // @TODO: Confirm that WordPress block parser would have parsed this as a block. - $this->block_name = $name; - $this->block_attributes = $attributes; - - return true; } public function get_updated_html(): string { diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Url_Processor.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Url_Processor.php index d5075c8ae8..786c646562 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Url_Processor.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Url_Processor.php @@ -18,10 +18,11 @@ class WP_Block_Markup_Url_Processor extends WP_Block_Markup_Processor { private $url_in_text_node_updated; private $inspected_url_attribute_idx = - 1; - public function __construct( $html, $base_url_string = null ) { - parent::__construct( $html ); - $this->base_url_string = $base_url_string; - $this->base_url_object = $base_url_string ? WP_URL::parse( $base_url_string ) : null; + public static function create_from_html( $html, $base_url_string = null ) { + $processor = static::create_fragment( $html ); + $processor->base_url_string = $base_url_string; + $processor->base_url_object = $base_url_string ? WP_URL::parse( $base_url_string ) : null; + return $processor; } public function get_updated_html(): string { diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php index fcbcd70133..629c879219 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php @@ -17,6 +17,7 @@ class WP_Directory_Tree_Entity_Reader implements \Iterator { private $pending_files = array(); private $parent_ids = array(); private $next_post_id; + private $create_root_page; private $is_finished = false; private $entities_read_so_far = 0; private $allowed_extensions = array(); @@ -28,16 +29,24 @@ public static function create( $options ) { if ( ! isset( $options['root_dir'] ) ) { - throw new \Exception( 'Missing required options: root_dir' ); + _doing_it_wrong( __FUNCTION__, 'Missing required options: root_dir', '1.0.0' ); + return false; } if ( ! isset( $options['first_post_id'] ) ) { - throw new \Exception( 'Missing required options: first_post_id' ); + _doing_it_wrong( __FUNCTION__, 'Missing required options: first_post_id', '1.0.0' ); + return false; + } + if ( 1 === $options['first_post_id'] ) { + _doing_it_wrong( __FUNCTION__, 'First post ID must be greater than 1', '1.0.0' ); + return false; } if ( ! isset( $options['allowed_extensions'] ) ) { - throw new \Exception( 'Missing required options: allowed_extensions' ); + _doing_it_wrong( __FUNCTION__, 'Missing required options: allowed_extensions', '1.0.0' ); + return false; } if ( ! isset( $options['index_file_patterns'] ) ) { - throw new \Exception( 'Missing required options: index_file_patterns' ); + _doing_it_wrong( __FUNCTION__, 'Missing required options: index_file_patterns', '1.0.0' ); + return false; } /** * @TODO: Use `sub_entity_reader_factory` instead of `markup_converter_factory` @@ -46,7 +55,8 @@ public static function create( * from the files, not just the post_content. */ if ( ! isset( $options['markup_converter_factory'] ) ) { - throw new \Exception( 'Missing required options: markup_converter_factory' ); + _doing_it_wrong( __FUNCTION__, 'Missing required options: markup_converter_factory', '1.0.0' ); + return false; } return new self( $filesystem, $options ); } @@ -57,6 +67,7 @@ private function __construct( ) { $this->file_visitor = new \WordPress\Filesystem\WP_Filesystem_Visitor( $filesystem, $options['root_dir'] ); $this->filesystem = $filesystem; + $this->create_root_page = $options['create_root_page'] ?? false; $this->next_post_id = $options['first_post_id']; $this->allowed_extensions = $options['allowed_extensions']; $this->index_file_patterns = $options['index_file_patterns']; @@ -70,66 +81,69 @@ public function next_entity() { $depth = $this->file_visitor->get_current_depth(); $parent_id = $this->parent_ids[ $depth - 1 ] ?? null; - if ( null === $parent_id && $depth > 1 ) { - // There's no parent ID even though we're a few levels deep. - // This is a scenario where `next_file()` skipped a few levels - // of directories with no relevant content in them: - // - // - /docs/ - // - /foo/ - // - /bar/ - // - /baz.md - // - // In this case, we need to backtrack and create the missing - // parent pages for /bar/ and /foo/. - - // Find the topmost missing parent ID - $missing_parent_id_depth = 1; - while ( isset( $this->parent_ids[ $missing_parent_id_depth ] ) ) { - ++$missing_parent_id_depth; + // Don't create a parent page for the root directory. + if ( $depth > 1 || $this->create_root_page ) { + if ( null === $parent_id && $depth > 1 ) { + // There's no parent ID even though we're a few levels deep. + // This is a scenario where `next_file()` skipped a few levels + // of directories with no relevant content in them: + // + // - /docs/ + // - /foo/ + // - /bar/ + // - /baz.md + // + // In this case, we need to backtrack and create the missing + // parent pages for /bar/ and /foo/. + + // Find the topmost missing parent ID + $missing_parent_id_depth = 1; + while ( isset( $this->parent_ids[ $missing_parent_id_depth ] ) ) { + ++$missing_parent_id_depth; + } + + // Move up to the corresponding directory + $missing_parent_path = $dir; + for ( $i = $missing_parent_id_depth; $i < $depth; $i++ ) { + $missing_parent_path = dirname( $missing_parent_path ); + } + + $this->parent_ids[ $missing_parent_id_depth ] = $this->emit_post_entity( + array( + 'content' => '', + 'source_path' => $missing_parent_path, + 'parent_id' => $this->parent_ids[ $missing_parent_id_depth - 1 ] ?? null, + 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $missing_parent_path ) ), + ) + ); + } elseif ( false === $this->pending_directory_index ) { + // No directory index candidate – let's create a fake page + // just to have something in the page tree. + $this->parent_ids[ $depth ] = $this->emit_post_entity( + array( + 'content' => '', + 'source_path' => $dir, + 'parent_id' => $parent_id, + 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $dir ) ), + ) + ); + // We're no longer looking for a directory index. + $this->pending_directory_index = null; + } else { + $file_path = $this->pending_directory_index; + $this->parent_ids[ $depth ] = $this->emit_post_entity( + array( + 'content' => $this->filesystem->read_file( $file_path ), + 'source_path' => $file_path, + 'parent_id' => $parent_id, + 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $file_path ) ), + ) + ); + // We're no longer looking for a directory index. + $this->pending_directory_index = null; } - - // Move up to the corresponding directory - $missing_parent_path = $dir; - for ( $i = $missing_parent_id_depth; $i < $depth; $i++ ) { - $missing_parent_path = dirname( $missing_parent_path ); - } - - $this->parent_ids[ $missing_parent_id_depth ] = $this->emit_post_entity( - array( - 'content' => '', - 'source_path' => $missing_parent_path, - 'parent_id' => $this->parent_ids[ $missing_parent_id_depth - 1 ], - 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $missing_parent_path ) ), - ) - ); - } elseif ( false === $this->pending_directory_index ) { - // No directory index candidate – let's create a fake page - // just to have something in the page tree. - $this->parent_ids[ $depth ] = $this->emit_post_entity( - array( - 'content' => '', - 'source_path' => $dir, - 'parent_id' => $parent_id, - 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $dir ) ), - ) - ); - // We're no longer looking for a directory index. - $this->pending_directory_index = null; - } else { - $file_path = $this->pending_directory_index; - $this->parent_ids[ $depth ] = $this->emit_post_entity( - array( - 'content' => $this->filesystem->read_file( $file_path ), - 'source_path' => $file_path, - 'parent_id' => $parent_id, - 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $file_path ) ), - ) - ); - // We're no longer looking for a directory index. - $this->pending_directory_index = null; + return true; } - return true; } while ( count( $this->pending_files ) ) { @@ -168,7 +182,7 @@ protected function emit_post_entity( $options ) { if ( ! $post_title ) { $removed_title = WP_Import_Utils::remove_first_h1_block_from_block_markup( $block_markup ); if ( false !== $removed_title ) { - $post_title = $removed_title['title']; + $post_title = $removed_title['h1_content']; $block_markup = $removed_title['remaining_html']; } } diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php index 44166b0f2a..e83e2aaa9e 100644 --- a/packages/playground/data-liberation/src/functions.php +++ b/packages/playground/data-liberation/src/functions.php @@ -38,7 +38,7 @@ function wp_rewrite_urls( $options ) { ); } - $p = new WP_Block_Markup_Url_Processor( $options['block_markup'], $options['base_url'] ); + $p = WP_Block_Markup_Url_Processor::create_from_html( $options['block_markup'], $options['base_url'] ); while ( $p->next_url() ) { $parsed_url = $p->get_parsed_url(); foreach ( $url_mapping as $mapping ) { diff --git a/packages/playground/data-liberation/src/import/WP_Import_Utils.php b/packages/playground/data-liberation/src/import/WP_Import_Utils.php index 91761e4223..77e29a2870 100644 --- a/packages/playground/data-liberation/src/import/WP_Import_Utils.php +++ b/packages/playground/data-liberation/src/import/WP_Import_Utils.php @@ -86,7 +86,7 @@ public static function remove_first_h1_block_from_block_markup( $html ) { } return array( - 'content' => trim( $title ), + 'h1_content' => trim( $title ), 'remaining_html' => substr( $html, $p->get_string_index_after_current_token() diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 77ebd09c00..41a1b2c4c9 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -421,7 +421,7 @@ protected function index_next_entities( $count = 10000 ) { $this->indexed_assets_urls[ $data['attachment_url'] ] = true; } elseif ( isset( $data['post_content'] ) ) { $post = $data; - $p = new WP_Block_Markup_Url_Processor( $post['post_content'], $this->source_site_url ); + $p = WP_Block_Markup_Url_Processor::create_from_html( $post['post_content'], $this->source_site_url ); while ( $p->next_url() ) { if ( ! $this->url_processor_matched_asset_url( $p ) ) { continue; @@ -588,7 +588,7 @@ protected function frontload_next_entity() { $this->enqueue_attachment_download( $data['attachment_url'] ); } elseif ( isset( $data['post_content'] ) ) { $post = $data; - $p = new WP_Block_Markup_Url_Processor( $post['post_content'], $this->source_site_url ); + $p = WP_Block_Markup_Url_Processor::create_from_html( $post['post_content'], $this->source_site_url ); while ( $p->next_url() ) { if ( ! $this->url_processor_matched_asset_url( $p ) ) { continue; @@ -648,7 +648,7 @@ protected function import_next_entity() { if ( ! isset( $data[ $key ] ) ) { continue; } - $p = new WP_Block_Markup_Url_Processor( $data[ $key ], $this->source_site_url ); + $p = WP_Block_Markup_Url_Processor::create_from_html( $data[ $key ], $this->source_site_url ); while ( $p->next_url() ) { // Relative URLs are okay at this stage. if ( ! $p->get_raw_url() ) { diff --git a/packages/playground/data-liberation/tests/WPBlockMarkupUrlProcessorTests.php b/packages/playground/data-liberation/tests/WPBlockMarkupUrlProcessorTests.php index abab68466a..13e51efd3a 100644 --- a/packages/playground/data-liberation/tests/WPBlockMarkupUrlProcessorTests.php +++ b/packages/playground/data-liberation/tests/WPBlockMarkupUrlProcessorTests.php @@ -7,7 +7,7 @@ class WPBlockMarkupUrlProcessorTests extends TestCase public function test_next_url_in_current_token_returns_false_when_no_url_is_found() { - $p = new WP_Block_Markup_Url_Processor('Text without URLs'); + $p = WP_Block_Markup_Url_Processor::create_from_html('Text without URLs'); $this->assertFalse( $p->next_url_in_current_token() ); } @@ -17,7 +17,7 @@ public function test_next_url_in_current_token_returns_false_when_no_url_is_foun */ public function test_next_url_finds_the_url($url, $markup, $base_url='https://wordpress.org') { - $p = new WP_Block_Markup_Url_Processor($markup, $base_url); + $p = WP_Block_Markup_Url_Processor::create_from_html($markup, $base_url); $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); $this->assertEquals($url, $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.'); } @@ -77,7 +77,7 @@ static public function provider_test_finds_next_url() public function test_next_url_returns_false_once_theres_no_more_urls( ) { $markup = ''; - $p = new WP_Block_Markup_Url_Processor( $markup ); + $p = WP_Block_Markup_Url_Processor::create_from_html( $markup ); $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); $this->assertFalse( $p->next_url(), 'Found more URLs than expected.' ); @@ -85,7 +85,7 @@ public function test_next_url_returns_false_once_theres_no_more_urls( ) { public function test_next_url_finds_urls_in_multiple_attributes( ) { $markup = ''; - $p = new WP_Block_Markup_Url_Processor( $markup ); + $p = WP_Block_Markup_Url_Processor::create_from_html( $markup ); $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); $this->assertEquals( 'https://first-url.org', $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' ); @@ -95,7 +95,7 @@ public function test_next_url_finds_urls_in_multiple_attributes( ) { public function test_next_url_finds_urls_in_multiple_tags( ) { $markup = '
'; - $p = new WP_Block_Markup_Url_Processor( $markup ); + $p = WP_Block_Markup_Url_Processor::create_from_html( $markup ); $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); $this->assertEquals( 'https://first-url.org', $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' ); @@ -112,7 +112,7 @@ public function test_next_url_finds_urls_in_multiple_tags( ) { */ public function test_set_url($markup, $new_url, $new_markup) { - $p = new WP_Block_Markup_Url_Processor($markup); + $p = WP_Block_Markup_Url_Processor::create_from_html($markup); $this->assertTrue($p->next_url(), 'Failed to find the URL in the markup.'); $this->assertTrue($p->set_raw_url($new_url), 'Failed to set the URL in the markup.'); $this->assertEquals($new_markup, $p->get_updated_html(), 'Failed to set the URL in the markup.'); @@ -141,7 +141,7 @@ static public function provider_test_set_url_examples() public function test_set_url_complex_test_case() { - $p = new WP_Block_Markup_Url_Processor( + $p = WP_Block_Markup_Url_Processor::create_from_html( << @@ -189,7 +189,7 @@ public function test_set_url_complex_test_case() } public function test_next_url_replace_the_url_for_simple_text() { - $p = new WP_Block_Markup_Url_Processor( + $p = WP_Block_Markup_Url_Processor::create_from_html( 'https://example.com/test/?page_id=1', 'https://example.com/' ); From eab1317a17486374a97bc1e7b89f511d3e8b3e54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Wed, 18 Dec 2024 11:56:12 +0100 Subject: [PATCH 14/26] Improve conversion to markdown --- .../data-liberation-markdown/plugin.php | 5 +++ .../src/WP_Blocks_To_Markdown.php | 34 ++++++++++---- .../WP_Block_Markup_Processor.php | 44 ++++++++++++++----- .../WP_Directory_Tree_Entity_Reader.php | 6 +++ 4 files changed, 68 insertions(+), 21 deletions(-) create mode 100644 packages/playground/data-liberation-markdown/plugin.php diff --git a/packages/playground/data-liberation-markdown/plugin.php b/packages/playground/data-liberation-markdown/plugin.php new file mode 100644 index 0000000000..fdea9ae81e --- /dev/null +++ b/packages/playground/data-liberation-markdown/plugin.php @@ -0,0 +1,5 @@ +markdown .= "* $line\n"; } } + $this->markdown .= "\n"; break; case 'wp:list-item': $this->markdown .= $this->skip_and_convert_inner_html() . "\n"; break; + case 'wp:group': + // Ignore group blocks and process their inner blocks as if + // the group didn't exist. + break; case 'wp:code': $code = $this->skip_and_convert_inner_html(); $language = $this->blocks->get_block_attribute('language') ?? ''; - $fence = str_repeat('`', max(3, $this->longest_sequence_of($code, '`') + 1)); - $this->markdown .= "$fence$language\n$code\n$fence\n\n"; + $this->markdown .= $this->wrap_in_code_fence($code, $language); break; case 'wp:image': $alt = $this->blocks->get_block_attribute('alt') ?? ''; @@ -99,16 +103,23 @@ private function handle_block_comment() { break; default: $code = ''; - $code .= ''; - $code .= $this->skip_and_convert_inner_html(); - $code .= ''; - $language = 'block'; - $fence = str_repeat('`', max(3, $this->longest_sequence_of($code, '`') + 1)); - $this->markdown .= "$fence$language\n$code\n$fence\n\n"; + if($this->blocks->is_self_closing_block()) { + $code .= ''; + } else { + $code .= '' . "\n"; + $code .= trim($this->skip_and_convert_inner_html()) . "\n"; + $code .= ''; + } + $this->markdown .= $this->wrap_in_code_fence($code, 'block'); break; } } + private function wrap_in_code_fence($code, $language = '') { + $fence = str_repeat('`', max(3, $this->longest_sequence_of($code, '`') + 1)); + return "$fence$language\n$code\n$fence\n\n"; + } + private function handle_tag() { $prefix = $this->blocks->is_tag_closer() ? '-' : '+'; $event = $prefix . $this->blocks->get_tag(); @@ -155,8 +166,13 @@ private function handle_tag() { } private function skip_and_convert_inner_html() { + // It's important we call get_block_breadcrumbs() before + // calling skip_and_get_block_inner_html() because the + // latter will get to the block closer and pop the block + // we've just entered from the stack. + $breadcrumbs_inside_block = $this->get_block_breadcrumbs(); $html = $this->blocks->skip_and_get_block_inner_html(); - $converter = new WP_Blocks_To_Markdown($html, [], $this->get_block_breadcrumbs()); + $converter = new WP_Blocks_To_Markdown($html, [], $breadcrumbs_inside_block); $converter->convert(); return $converter->get_markdown(); } diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php index 9eab194b3b..81a9ad08c1 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php @@ -17,6 +17,7 @@ class WP_Block_Markup_Processor extends WP_HTML_Processor { protected $block_attributes; private $block_attributes_updated; private $block_closer; + private $self_closing_flag; private $stack_of_open_blocks = array(); private $last_block_error; @@ -126,6 +127,10 @@ public function is_block_closer() { return $this->block_name !== null && $this->block_closer === true; } + public function is_self_closing_block() { + return $this->block_name !== null && $this->self_closing_flag === true; + } + private $in_next_token = false; public function next_token(): bool { // Prevent running next_token() logic twice when the parent method @@ -140,6 +145,7 @@ public function next_token(): bool { $this->block_name = null; $this->block_attributes = null; $this->block_closer = false; + $this->self_closing_flag = false; $this->block_attributes_updated = false; while ( true ) { @@ -207,27 +213,41 @@ public function next_token(): bool { $name = substr( $text, $name_starts_at, $name_length + 3 ); $at += $name_length; + // Assume no attributes by default. + $attributes = array(); + // Skip the whitespace that follows the block name. $at += strspn( $text, ' \t\f\r\n', $at ); if ( $at < strlen( $text ) ) { - // It may be a block with attributes... + // It may be a self-closing block or a block with attributes. - // ...but block closers cannot have attributes. + // However, block closers can be neither – let's short-circuit. if ( $this->block_closer ) { return true; } + // The rest of the comment can only consist of block attributes + // and an optional solidus character. + $rest = trim( substr( $text, $at ) ); + $at = strlen( $text ); + + // Inspect our potential JSON for the self-closing solidus (`/`) character. + $json_maybe = $rest; + if ( substr( $json_maybe, -1 ) === '/' ) { + // Self-closing block () + $this->self_closing_flag = true; + $json_maybe = substr( $json_maybe, 0, -1 ); + } + // Let's try to parse attributes as JSON. - $json_maybe = substr( $text, $at ); - $attributes = json_decode( $json_maybe, true ); - if ( null === $attributes || ! is_array( $attributes ) ) { - // This comment looked like a block comment, but the attributes didn't - // parse as a JSON array. This means it wasn't a block after all. - return true; + if( strlen( $json_maybe ) > 0 ) { + $attributes = json_decode( $json_maybe, true ); + if ( null === $attributes || ! is_array( $attributes ) ) { + // This comment looked like a block comment, but the attributes didn't + // parse as a JSON array. This means it wasn't a block after all. + return true; + } } - } else { - // This is a block without attributes. - $attributes = array(); } // We have a block name and a valid attributes array. We may not find a block @@ -242,7 +262,7 @@ public function next_token(): bool { $this->last_block_error = sprintf('Block closer %s does not match the last opened block %s.', $name, $popped); return false; } - } else { + } else if (!$this->self_closing_flag) { array_push($this->stack_of_open_blocks, $name); } diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php index 629c879219..9d4ca431e2 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php @@ -345,7 +345,13 @@ public function key(): int { return $this->entities_read_so_far - 1; } + private $is_started = false; + public function valid(): bool { + if ( ! $this->is_started ) { + $this->next(); + $this->is_started = true; + } return ! $this->is_finished; } From 1cb383a76911a99af04170dc09d5bf8d91faf275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Thu, 19 Dec 2024 14:14:35 +0100 Subject: [PATCH 15/26] Unit test the directory reader commit the missing files --- .../blueprint.json | 23 +++ .../plugin.php | 188 ++++++++++++++++++ .../run.sh | 9 + .../playground/data-liberation/bootstrap.php | 1 + .../playground/data-liberation/phpunit.xml | 1 + .../WP_Directory_Tree_Entity_Reader.php | 126 ++++++------ .../WPDirectoryTreeEntityReaderTests.php | 71 +++++++ .../nested/page1.html | 2 + .../directory-tree-entity-reader/root.html | 2 + 9 files changed, 360 insertions(+), 63 deletions(-) create mode 100644 packages/playground/data-liberation-static-files-editor/blueprint.json create mode 100644 packages/playground/data-liberation-static-files-editor/plugin.php create mode 100644 packages/playground/data-liberation-static-files-editor/run.sh create mode 100644 packages/playground/data-liberation/tests/WPDirectoryTreeEntityReaderTests.php create mode 100644 packages/playground/data-liberation/tests/fixtures/directory-tree-entity-reader/nested/page1.html create mode 100644 packages/playground/data-liberation/tests/fixtures/directory-tree-entity-reader/root.html diff --git a/packages/playground/data-liberation-static-files-editor/blueprint.json b/packages/playground/data-liberation-static-files-editor/blueprint.json new file mode 100644 index 0000000000..0e3b539bdf --- /dev/null +++ b/packages/playground/data-liberation-static-files-editor/blueprint.json @@ -0,0 +1,23 @@ +{ + "$schema": "../blueprints/public/blueprint-schema.json", + "login": true, + "constants": { + "WP_DEBUG": true, + "WP_DEBUG_LOG": true, + "WP_DEBUG_DISPLAY": true + }, + "steps": [ + { + "step": "activatePlugin", + "pluginPath": "data-liberation/plugin.php" + }, + { + "step": "activatePlugin", + "pluginPath": "z-data-liberation-markdown/plugin.php" + }, + { + "step": "activatePlugin", + "pluginPath": "z-data-liberation-static-files-editor/plugin.php" + } + ] +} diff --git a/packages/playground/data-liberation-static-files-editor/plugin.php b/packages/playground/data-liberation-static-files-editor/plugin.php new file mode 100644 index 0000000000..110e10e99d --- /dev/null +++ b/packages/playground/data-liberation-static-files-editor/plugin.php @@ -0,0 +1,188 @@ + WP_STATIC_CONTENT_DIR, + 'create_root_page' => true, + 'first_post_id' => 2, + 'allowed_extensions' => array( 'md' ), + 'index_file_patterns' => array( '#^index\.md$#' ), + 'markup_converter_factory' => function( $content ) { + return new WP_Markdown_To_Blocks( $content ); + }, + ) + ); + }, + array(), + null + ); + + $import_session = WP_Import_Session::create( + array( + 'data_source' => 'static_pages', + 'importer' => $importer, + ) + ); + + data_liberation_import_step( $import_session, $importer ); + + self::$importing = false; + } + + /** + * Resets the database to a clean state. + * + * @TODO: Make it work with MySQL, right now it uses SQLite-specific code. + */ + static private function reset_db_data() { + $GLOBALS['@pdo']->query('DELETE FROM wp_posts WHERE id > 0'); + $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_posts'"); + + $GLOBALS['@pdo']->query('DELETE FROM wp_postmeta WHERE post_id > 1'); + $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=20 WHERE NAME='wp_postmeta'"); + + $GLOBALS['@pdo']->query('DELETE FROM wp_comments'); + $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_comments'"); + + $GLOBALS['@pdo']->query('DELETE FROM wp_commentmeta'); + $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_commentmeta'"); + } + + /** + * Recreate the entire file structure when any post is saved. + * + * Why recreate? + * + * It's easier to recreate the entire file structure than to keep track of + * which files have been added, deleted, renamed and moved under + * another parent, or changed via a direct SQL query. + */ + static public function on_save_post($post_id) { + // Prevent collisions between the initial create_db_pages_from_html_files call + // process and the save_post_page hook. + if (self::$importing) { + return; + } + + self::deltree(WP_STATIC_CONTENT_DIR); + mkdir(WP_STATIC_CONTENT_DIR); + self::save_db_pages_as_html(WP_STATIC_CONTENT_DIR); + } + + static private function save_db_pages_as_html($path, $parent_id = 0) { + if (!file_exists($path)) { + mkdir($path, 0777, true); + } + + $args = array( + 'post_type' => 'page', + 'posts_per_page' => -1, + 'post_parent' => $parent_id, + 'post_status' => 'publish', + ); + $pages = new WP_Query($args); + + if ($pages->have_posts()) { + while ($pages->have_posts()) { + $pages->the_post(); + $page_id = get_the_ID(); + $page = get_post($page_id); + $title = sanitize_title(get_the_title()); + + // $content = '

' . esc_html(get_the_title()) . "

\n\n" . get_the_content(); + + $converter = new WP_Blocks_To_Markdown( + $page->post_content, + array( + 'title' => get_the_title(), + ) + ); + $converter->convert(); + $content = $converter->get_markdown(); + + $child_pages = get_pages(array('child_of' => $page_id, 'post_type' => 'page')); + + if (!file_exists($path)) { + mkdir($path, 0777, true); + } + + if (!empty($child_pages)) { + $new_parent = $path . '/' . $page->menu_order . '_' . $title; + if (!file_exists($new_parent)) { + mkdir($new_parent, 0777, true); + } + // file_put_contents($new_parent . '/index.html', $content); + file_put_contents($new_parent . '/index.md', $content); + self::save_db_pages_as_html($new_parent, $page_id); + } else { + // file_put_contents($path . '/' . $page->menu_order . '_' . $title . '.html', $content); + file_put_contents($path . '/' . $page->menu_order . '_' . $title . '.md', $content); + } + } + } + wp_reset_postdata(); + } + + static private function deltree($path) { + if (!file_exists($path)) { + return; + } + + $iterator = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($path), RecursiveIteratorIterator::CHILD_FIRST); + foreach ($iterator as $file) { + /** @var SplFileInfo $file */ + if ($file->isDir()) { + rmdir($file->getRealPath()); + } else if($file->isFile()) { + unlink($file->getRealPath()); + } + } + + rmdir($path); + } +} + +WP_Static_Files_Editor_Plugin::register_hooks(); diff --git a/packages/playground/data-liberation-static-files-editor/run.sh b/packages/playground/data-liberation-static-files-editor/run.sh new file mode 100644 index 0000000000..d33cb2b3e1 --- /dev/null +++ b/packages/playground/data-liberation-static-files-editor/run.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +bun ../cli/src/cli.ts \ + server \ + --mount=../data-liberation-static-files-editor:/wordpress/wp-content/plugins/z-data-liberation-static-files-editor \ + --mount=../data-liberation-markdown:/wordpress/wp-content/plugins/z-data-liberation-markdown \ + --mount=../data-liberation:/wordpress/wp-content/plugins/data-liberation \ + --mount=./my-notes/workdir:/wordpress/wp-content/uploads/static-pages \ + --blueprint=./blueprint.json diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index dcfab2623f..aa0abab882 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -98,6 +98,7 @@ if ( ! function_exists( '_doing_it_wrong' ) ) { $GLOBALS['_doing_it_wrong_messages'] = array(); function _doing_it_wrong( $method, $message, $version ) { + throw new Exception( $message ); $GLOBALS['_doing_it_wrong_messages'][] = $message; } } diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index cd97c0ec90..4018322256 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -7,6 +7,7 @@ tests/WPHTMLToBlocksTests.php tests/WPHTMLEntityReaderTests.php tests/WPEPubEntityReaderTests.php + tests/WPDirectoryTreeEntityReaderTests.php tests/WPURLInTextProcessorTests.php tests/WPBlockMarkupProcessorTests.php tests/WPBlockMarkupUrlProcessorTests.php diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php index 9d4ca431e2..7a578f9538 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php @@ -17,7 +17,7 @@ class WP_Directory_Tree_Entity_Reader implements \Iterator { private $pending_files = array(); private $parent_ids = array(); private $next_post_id; - private $create_root_page; + private $create_index_pages; private $is_finished = false; private $entities_read_so_far = 0; private $allowed_extensions = array(); @@ -67,7 +67,7 @@ private function __construct( ) { $this->file_visitor = new \WordPress\Filesystem\WP_Filesystem_Visitor( $filesystem, $options['root_dir'] ); $this->filesystem = $filesystem; - $this->create_root_page = $options['create_root_page'] ?? false; + $this->create_index_pages = $options['create_index_pages'] ?? false; $this->next_post_id = $options['first_post_id']; $this->allowed_extensions = $options['allowed_extensions']; $this->index_file_patterns = $options['index_file_patterns']; @@ -81,69 +81,66 @@ public function next_entity() { $depth = $this->file_visitor->get_current_depth(); $parent_id = $this->parent_ids[ $depth - 1 ] ?? null; - // Don't create a parent page for the root directory. - if ( $depth > 1 || $this->create_root_page ) { - if ( null === $parent_id && $depth > 1 ) { - // There's no parent ID even though we're a few levels deep. - // This is a scenario where `next_file()` skipped a few levels - // of directories with no relevant content in them: - // - // - /docs/ - // - /foo/ - // - /bar/ - // - /baz.md - // - // In this case, we need to backtrack and create the missing - // parent pages for /bar/ and /foo/. - - // Find the topmost missing parent ID - $missing_parent_id_depth = 1; - while ( isset( $this->parent_ids[ $missing_parent_id_depth ] ) ) { - ++$missing_parent_id_depth; - } - - // Move up to the corresponding directory - $missing_parent_path = $dir; - for ( $i = $missing_parent_id_depth; $i < $depth; $i++ ) { - $missing_parent_path = dirname( $missing_parent_path ); - } - - $this->parent_ids[ $missing_parent_id_depth ] = $this->emit_post_entity( - array( - 'content' => '', - 'source_path' => $missing_parent_path, - 'parent_id' => $this->parent_ids[ $missing_parent_id_depth - 1 ] ?? null, - 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $missing_parent_path ) ), - ) - ); - } elseif ( false === $this->pending_directory_index ) { - // No directory index candidate – let's create a fake page - // just to have something in the page tree. - $this->parent_ids[ $depth ] = $this->emit_post_entity( - array( - 'content' => '', - 'source_path' => $dir, - 'parent_id' => $parent_id, - 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $dir ) ), - ) - ); - // We're no longer looking for a directory index. - $this->pending_directory_index = null; - } else { - $file_path = $this->pending_directory_index; - $this->parent_ids[ $depth ] = $this->emit_post_entity( - array( - 'content' => $this->filesystem->read_file( $file_path ), - 'source_path' => $file_path, - 'parent_id' => $parent_id, - 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $file_path ) ), - ) - ); - // We're no longer looking for a directory index. - $this->pending_directory_index = null; + if ( null === $parent_id && $depth > 1 ) { + // There's no parent ID even though we're a few levels deep. + // This is a scenario where `next_file()` skipped a few levels + // of directories with no relevant content in them: + // + // - /docs/ + // - /foo/ + // - /bar/ + // - /baz.md + // + // In this case, we need to backtrack and create the missing + // parent pages for /bar/ and /foo/. + + // Find the topmost missing parent ID + $missing_parent_id_depth = 1; + while ( isset( $this->parent_ids[ $missing_parent_id_depth ] ) ) { + ++$missing_parent_id_depth; } - return true; + + // Move up to the corresponding directory + $missing_parent_path = $dir; + for ( $i = $missing_parent_id_depth; $i < $depth; $i++ ) { + $missing_parent_path = dirname( $missing_parent_path ); + } + + $this->parent_ids[ $missing_parent_id_depth ] = $this->emit_post_entity( + array( + 'content' => '', + 'source_path' => $missing_parent_path, + 'parent_id' => $this->parent_ids[ $missing_parent_id_depth - 1 ] ?? null, + 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $missing_parent_path ) ), + ) + ); + } elseif ( false === $this->pending_directory_index ) { + // No directory index candidate – let's create a fake page + // just to have something in the page tree. + $this->parent_ids[ $depth ] = $this->emit_post_entity( + array( + 'content' => '', + 'source_path' => $dir, + 'parent_id' => $parent_id, + 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $dir ) ), + ) + ); + // We're no longer looking for a directory index. + $this->pending_directory_index = null; + } else { + $file_path = $this->pending_directory_index; + $this->parent_ids[ $depth ] = $this->emit_post_entity( + array( + 'content' => $this->filesystem->read_file( $file_path ), + 'source_path' => $file_path, + 'parent_id' => $parent_id, + 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $file_path ) ), + ) + ); + // We're no longer looking for a directory index. + $this->pending_directory_index = null; } + return true; } while ( count( $this->pending_files ) ) { @@ -300,6 +297,9 @@ protected function choose_directory_index( $files ) { return $idx; } } + if ( ! $this->create_index_pages && count( $files ) > 0 ) { + return 0; + } return -1; } diff --git a/packages/playground/data-liberation/tests/WPDirectoryTreeEntityReaderTests.php b/packages/playground/data-liberation/tests/WPDirectoryTreeEntityReaderTests.php new file mode 100644 index 0000000000..1d9745679f --- /dev/null +++ b/packages/playground/data-liberation/tests/WPDirectoryTreeEntityReaderTests.php @@ -0,0 +1,71 @@ + __DIR__ . '/fixtures/directory-tree-entity-reader', + 'first_post_id' => 2, + 'create_index_pages' => true, + 'allowed_extensions' => ['html'], + 'index_file_patterns' => ['#root.html#'], + 'markup_converter_factory' => function($markup) { + return new WP_HTML_To_Blocks( WP_HTML_Processor::create_fragment( $markup ) ); + }, + ] + ); + $entities = []; + while ( $reader->next_entity() ) { + $entities[] = $reader->get_entity(); + } + $this->assertCount(3, $entities); + + // The root index page + $this->assertEquals(2, $entities[0]->get_data()['post_id']); + $this->assertEquals('Root', $entities[0]->get_data()['post_title']); + $this->assertEquals(null, $entities[0]->get_data()['post_parent']); + + $this->assertEquals(3, $entities[1]->get_data()['post_id']); + $this->assertEquals('Nested', $entities[1]->get_data()['post_title']); + $this->assertEquals(2, $entities[1]->get_data()['post_parent']); + + $this->assertEquals(4, $entities[2]->get_data()['post_id']); + $this->assertEquals('Page 1', $entities[2]->get_data()['post_title']); + $this->assertEquals(3, $entities[2]->get_data()['post_parent']); + } + + public function test_with_create_index_pages_false() { + $reader = WP_Directory_Tree_Entity_Reader::create( + new WordPress\Filesystem\WP_Filesystem(), + [ + 'root_dir' => __DIR__ . '/fixtures/directory-tree-entity-reader', + 'first_post_id' => 2, + 'create_index_pages' => false, + 'allowed_extensions' => ['html'], + 'index_file_patterns' => ['#root.html#'], + 'markup_converter_factory' => function($markup) { + return new WP_HTML_To_Blocks( WP_HTML_Processor::create_fragment( $markup ) ); + }, + ] + ); + $entities = []; + while ( $reader->next_entity() ) { + $entities[] = $reader->get_entity(); + } + $this->assertCount(2, $entities); + + // The root page + $this->assertEquals(2, $entities[0]->get_data()['post_id']); + $this->assertEquals('Root', $entities[0]->get_data()['post_title']); + $this->assertEquals(null, $entities[0]->get_data()['post_parent']); + + // The nested page + $this->assertEquals(3, $entities[1]->get_data()['post_id']); + $this->assertEquals('Page 1', $entities[1]->get_data()['post_title']); + $this->assertEquals(2, $entities[1]->get_data()['post_parent']); + } +} diff --git a/packages/playground/data-liberation/tests/fixtures/directory-tree-entity-reader/nested/page1.html b/packages/playground/data-liberation/tests/fixtures/directory-tree-entity-reader/nested/page1.html new file mode 100644 index 0000000000..a76ff59751 --- /dev/null +++ b/packages/playground/data-liberation/tests/fixtures/directory-tree-entity-reader/nested/page1.html @@ -0,0 +1,2 @@ +

Page 1

+

This is page 1.

diff --git a/packages/playground/data-liberation/tests/fixtures/directory-tree-entity-reader/root.html b/packages/playground/data-liberation/tests/fixtures/directory-tree-entity-reader/root.html new file mode 100644 index 0000000000..5666bc9ad6 --- /dev/null +++ b/packages/playground/data-liberation/tests/fixtures/directory-tree-entity-reader/root.html @@ -0,0 +1,2 @@ +

Root

+

This is the root page.

From 273fb8f965f7c0589df5079ab5f8ed7542647ac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Thu, 19 Dec 2024 14:56:36 +0100 Subject: [PATCH 16/26] Split out WP_Filesystem_To_Post_Hierarchy to enable turning each file into a stream of entities --- .../playground/data-liberation/bootstrap.php | 1 + .../playground/data-liberation/phpunit.xml | 1 + .../WP_Block_Markup_Processor.php | 46 ++-- .../WP_Block_Markup_Url_Processor.php | 2 +- .../WP_Filesystem_To_Post_Hierarchy.php | 253 ++++++++++++++++++ .../entity-readers/WP_HTML_Entity_Reader.php | 4 +- .../WPFilesystemToPostHierarchyTests.php | 41 +++ 7 files changed, 322 insertions(+), 26 deletions(-) create mode 100644 packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php create mode 100644 packages/playground/data-liberation/tests/WPFilesystemToPostHierarchyTests.php diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index aa0abab882..912478a6f6 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -64,6 +64,7 @@ require_once __DIR__ . '/src/entity-readers/WP_EPub_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_WXR_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_Directory_Tree_Entity_Reader.php'; +require_once __DIR__ . '/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php'; require_once __DIR__ . '/src/xml-api/WP_XML_Decoder.php'; require_once __DIR__ . '/src/xml-api/WP_XML_Processor.php'; diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index 4018322256..c99ffcf858 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -8,6 +8,7 @@ tests/WPHTMLEntityReaderTests.php tests/WPEPubEntityReaderTests.php tests/WPDirectoryTreeEntityReaderTests.php + tests/WPFilesystemToPostHierarchyTests.php tests/WPURLInTextProcessorTests.php tests/WPBlockMarkupProcessorTests.php tests/WPBlockMarkupUrlProcessorTests.php diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php index 81a9ad08c1..c9fd47a02a 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Processor.php @@ -45,21 +45,21 @@ public function get_last_error(): ?string { } public function skip_and_get_block_inner_html() { - if('#block-comment' !== $this->get_token_type()) { + if ( '#block-comment' !== $this->get_token_type() ) { return false; } - if($this->is_block_closer()) { + if ( $this->is_block_closer() ) { return false; } - if(false === WP_HTML_Tag_Processor::set_bookmark('block-start')) { + if ( false === WP_HTML_Tag_Processor::set_bookmark( 'block-start' ) ) { return false; } $starting_block_depth = $this->get_block_depth(); - while($this->next_token()) { - if( + while ( $this->next_token() ) { + if ( $this->get_token_type() === '#block-comment' && $this->is_block_closer() && $this->get_block_depth() === $starting_block_depth - 1 @@ -68,16 +68,16 @@ public function skip_and_get_block_inner_html() { } } - if(false === WP_HTML_Tag_Processor::set_bookmark('block-end')) { - WP_HTML_Tag_Processor::release_bookmark('block-start'); + if ( false === WP_HTML_Tag_Processor::set_bookmark( 'block-end' ) ) { + WP_HTML_Tag_Processor::release_bookmark( 'block-start' ); return false; } $inner_html_start = $this->bookmarks['block-start']->start + $this->bookmarks['block-start']->length; - $inner_html_end = $this->bookmarks['block-end']->start - $inner_html_start; + $inner_html_end = $this->bookmarks['block-end']->start - $inner_html_start; - WP_HTML_Tag_Processor::release_bookmark('block-start'); - WP_HTML_Tag_Processor::release_bookmark('block-end'); + WP_HTML_Tag_Processor::release_bookmark( 'block-start' ); + WP_HTML_Tag_Processor::release_bookmark( 'block-end' ); return substr( $this->html, @@ -87,7 +87,7 @@ public function skip_and_get_block_inner_html() { } public function get_block_depth() { - return count($this->stack_of_open_blocks); + return count( $this->stack_of_open_blocks ); } public function get_block_breadcrumbs() { @@ -115,12 +115,12 @@ public function get_block_attributes() { return $this->block_attributes; } - public function get_block_attribute($attribute_name) { + public function get_block_attribute( $attribute_name ) { if ( null === $this->block_attributes ) { return false; } - return $this->block_attributes[$attribute_name] ?? false; + return $this->block_attributes[ $attribute_name ] ?? false; } public function is_block_closer() { @@ -135,7 +135,7 @@ public function is_self_closing_block() { public function next_token(): bool { // Prevent running next_token() logic twice when the parent method // makes recursive calls to itself. - if($this->in_next_token) { + if ( $this->in_next_token ) { return parent::next_token(); } $this->in_next_token = true; @@ -229,18 +229,18 @@ public function next_token(): bool { // The rest of the comment can only consist of block attributes // and an optional solidus character. $rest = trim( substr( $text, $at ) ); - $at = strlen( $text ); + $at = strlen( $text ); // Inspect our potential JSON for the self-closing solidus (`/`) character. $json_maybe = $rest; if ( substr( $json_maybe, -1 ) === '/' ) { // Self-closing block () $this->self_closing_flag = true; - $json_maybe = substr( $json_maybe, 0, -1 ); + $json_maybe = substr( $json_maybe, 0, -1 ); } // Let's try to parse attributes as JSON. - if( strlen( $json_maybe ) > 0 ) { + if ( strlen( $json_maybe ) > 0 ) { $attributes = json_decode( $json_maybe, true ); if ( null === $attributes || ! is_array( $attributes ) ) { // This comment looked like a block comment, but the attributes didn't @@ -256,14 +256,14 @@ public function next_token(): bool { $this->block_name = $name; $this->block_attributes = $attributes; - if($this->block_closer) { - $popped = array_pop($this->stack_of_open_blocks); - if($popped !== $name) { - $this->last_block_error = sprintf('Block closer %s does not match the last opened block %s.', $name, $popped); + if ( $this->block_closer ) { + $popped = array_pop( $this->stack_of_open_blocks ); + if ( $popped !== $name ) { + $this->last_block_error = sprintf( 'Block closer %s does not match the last opened block %s.', $name, $popped ); return false; } - } else if (!$this->self_closing_flag) { - array_push($this->stack_of_open_blocks, $name); + } elseif ( ! $this->self_closing_flag ) { + array_push( $this->stack_of_open_blocks, $name ); } return true; diff --git a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Url_Processor.php b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Url_Processor.php index 786c646562..06548ce66f 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Url_Processor.php +++ b/packages/playground/data-liberation/src/block-markup/WP_Block_Markup_Url_Processor.php @@ -19,7 +19,7 @@ class WP_Block_Markup_Url_Processor extends WP_Block_Markup_Processor { private $inspected_url_attribute_idx = - 1; public static function create_from_html( $html, $base_url_string = null ) { - $processor = static::create_fragment( $html ); + $processor = static::create_fragment( $html ); $processor->base_url_string = $base_url_string; $processor->base_url_object = $base_url_string ? WP_URL::parse( $base_url_string ) : null; return $processor; diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php new file mode 100644 index 0000000000..3e99e999a2 --- /dev/null +++ b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php @@ -0,0 +1,253 @@ +file_visitor = $file_visitor; + $this->create_index_pages = $options['create_index_pages'] ?? true; + $this->next_post_id = $options['first_post_id']; + $this->filter_pattern = $options['filter_pattern']; + $this->index_file_pattern = $options['index_file_pattern']; + } + + public function get_current_post() { + return $this->current_post; + } + + public function next_post() { + $this->current_post = null; + if ( $this->is_finished ) { + return false; + } + while ( true ) { + if ( null !== $this->pending_directory_index ) { + $dir = $this->file_visitor->get_event()->dir; + $depth = $this->file_visitor->get_current_depth(); + $parent_id = $this->parent_ids[ $depth - 1 ] ?? null; + + if ( null === $parent_id && $depth > 1 ) { + // There's no parent ID even though we're a few levels deep. + // This is a scenario where `next_file()` skipped a few levels + // of directories with no relevant content in them: + // + // - /docs/ + // - /foo/ + // - /bar/ + // - /baz.md + // + // In this case, we need to backtrack and create the missing + // parent pages for /bar/ and /foo/. + + // Find the topmost missing parent ID + $missing_parent_id_depth = 1; + while ( isset( $this->parent_ids[ $missing_parent_id_depth ] ) ) { + ++$missing_parent_id_depth; + } + + // Move up to the corresponding directory + $missing_parent_path = $dir; + for ( $i = $missing_parent_id_depth; $i < $depth; $i++ ) { + $missing_parent_path = dirname( $missing_parent_path ); + } + + $this->parent_ids[ $missing_parent_id_depth ] = $this->emit_object( + array( + 'type' => 'directory', + 'source_path' => $missing_parent_path, + 'parent_id' => $this->parent_ids[ $missing_parent_id_depth - 1 ] ?? null, + ) + ); + } elseif ( false === $this->pending_directory_index ) { + // No directory index candidate – let's create a fake page + // just to have something in the page tree. + $this->parent_ids[ $depth ] = $this->emit_object( + array( + 'type' => 'file_placeholder', + 'source_path' => $dir, + 'parent_id' => $parent_id, + ) + ); + // We're no longer looking for a directory index. + $this->pending_directory_index = null; + } else { + $file_path = $this->pending_directory_index; + $this->parent_ids[ $depth ] = $this->emit_object( + array( + 'type' => 'file', + 'source_path' => $file_path, + 'parent_id' => $parent_id, + ) + ); + // We're no longer looking for a directory index. + $this->pending_directory_index = null; + } + return true; + } + + while ( count( $this->pending_files ) ) { + $parent_id = $this->parent_ids[ $this->file_visitor->get_current_depth() ] ?? null; + $file_path = array_shift( $this->pending_files ); + $this->emit_object( + array( + 'type' => 'file', + 'source_path' => $file_path, + 'parent_id' => $parent_id, + ) + ); + return true; + } + + if ( false === $this->next_file() ) { + break; + } + } + $this->is_finished = true; + return false; + } + + protected function emit_object( $options ) { + $post_id = $this->next_post_id; + ++$this->next_post_id; + $this->current_post = array_merge( + $options, + array( + 'post_id' => $post_id, + ) + ); + ++$this->entities_read_so_far; + return $post_id; + } + + private function next_file() { + $this->pending_files = array(); + while ( $this->file_visitor->next() ) { + $event = $this->file_visitor->get_event(); + + if ( $event->is_exiting() ) { + // Clean up stale IDs to save some memory when processing + // large directory trees. + unset( $this->parent_ids[ $event->dir ] ); + continue; + } + + if ( $event->is_entering() ) { + $abs_paths = array(); + foreach ( $event->files as $filename ) { + $abs_paths[] = $event->dir . '/' . $filename; + } + $this->pending_files = $this->choose_relevant_files( $abs_paths ); + if ( ! count( $this->pending_files ) ) { + // Only consider directories with relevant files in them. + // Otherwise we'll create fake pages for media directories + // and other directories that don't contain any content. + // + // One corner case is when there's a few levels of directories + // with a single relevant file at the bottom: + // + // - /docs/ + // - /foo/ + // - /bar/ + // - /baz.md + // + // In this case, `next_entity()` will backtrack at baz.md and + // create the missing parent pages. + continue; + } + $directory_index_idx = $this->choose_directory_index( $this->pending_files ); + if ( -1 === $directory_index_idx ) { + $this->pending_directory_index = false; + } else { + $this->pending_directory_index = $this->pending_files[ $directory_index_idx ]; + unset( $this->pending_files[ $directory_index_idx ] ); + } + return true; + } + + return false; + } + return false; + } + + protected function choose_directory_index( $files ) { + foreach ( $files as $idx => $file ) { + if ( $this->looks_like_directory_index( $file ) ) { + return $idx; + } + } + if ( ! $this->create_index_pages && count( $files ) > 0 ) { + return 0; + } + return -1; + } + + protected function looks_like_directory_index( $path ) { + return preg_match( $this->index_file_pattern, basename( $path ) ); + } + + protected function choose_relevant_files( $paths ) { + $filtered_paths = array(); + foreach ( $paths as $path ) { + if ( preg_match( $this->filter_pattern, $path ) ) { + $filtered_paths[] = $path; + } + } + return $filtered_paths; + } + + /** + * @TODO: Either implement this method, or introduce a concept of + * reentrant and non-reentrant entity readers. + */ + public function get_reentrancy_cursor() { + return ''; + } +} diff --git a/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php index 67ceadad7d..92d47ac27f 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php @@ -26,11 +26,11 @@ public function next_entity() { // If we've already read some entities, skip to the next one. if ( null !== $this->entities ) { - if ( count( $this->entities ) <= 1 ) { + array_shift( $this->entities ); + if ( count( $this->entities ) === 0 ) { $this->finished = true; return false; } - array_shift( $this->entities ); return true; } diff --git a/packages/playground/data-liberation/tests/WPFilesystemToPostHierarchyTests.php b/packages/playground/data-liberation/tests/WPFilesystemToPostHierarchyTests.php new file mode 100644 index 0000000000..87240aaa2c --- /dev/null +++ b/packages/playground/data-liberation/tests/WPFilesystemToPostHierarchyTests.php @@ -0,0 +1,41 @@ + __DIR__ . '/fixtures/directory-tree-entity-reader', + 'first_post_id' => 2, + 'create_index_pages' => true, + 'filter_pattern' => '#\.html$#', + 'index_file_pattern' => '#root.html#', + ] + ); + $posts = []; + while ( $reader->next_post() ) { + $posts[] = $reader->get_current_post(); + } + $this->assertCount(3, $posts); + + // The root index page + // Root index page + $this->assertEquals(2, $posts[0]['post_id']); + $this->assertNull($posts[0]['parent_id']); + $this->assertEquals('file', $posts[0]['type']); + + // Nested directory page + $this->assertEquals(3, $posts[1]['post_id']); + $this->assertEquals(2, $posts[1]['parent_id']); + $this->assertEquals('directory', $posts[1]['type']); + + // Leaf page + $this->assertEquals(4, $posts[2]['post_id']); + $this->assertEquals(3, $posts[2]['parent_id']); + $this->assertEquals('file', $posts[2]['type']); + } + +} From 753993c8f4b28b13fd9c984b690459d4cca03174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Thu, 19 Dec 2024 14:57:26 +0100 Subject: [PATCH 17/26] Remove the unimplemented get_reentrancy_cursor method --- .../entity-readers/WP_Filesystem_To_Post_Hierarchy.php | 8 -------- 1 file changed, 8 deletions(-) diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php index 3e99e999a2..139f737f19 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php @@ -242,12 +242,4 @@ protected function choose_relevant_files( $paths ) { } return $filtered_paths; } - - /** - * @TODO: Either implement this method, or introduce a concept of - * reentrant and non-reentrant entity readers. - */ - public function get_reentrancy_cursor() { - return ''; - } } From daa1e87ee30d58dd2c19217576fe5a02eea60a53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Thu, 19 Dec 2024 15:19:22 +0100 Subject: [PATCH 18/26] Use a generic WP_Block_Markup_Entity_Reader for sourcing entities from disk files --- .../playground/data-liberation/bootstrap.php | 1 + .../WP_Block_Markup_Entity_Reader.php | 99 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index 912478a6f6..b92170ea40 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -63,6 +63,7 @@ require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_EPub_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_WXR_Entity_Reader.php'; +require_once __DIR__ . '/src/entity-readers/WP_Block_Markup_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_Directory_Tree_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php'; diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php new file mode 100644 index 0000000000..87c53a4c82 --- /dev/null +++ b/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php @@ -0,0 +1,99 @@ +block_markup = $block_markup; + $this->metadata = $metadata; + $this->post_id = $post_id; + } + + public function next_entity() { + if ( $this->finished ) { + return false; + } + + $this->current_entity = null; + + if ( null !== $this->enqueued_entities ) { + if ( count( $this->enqueued_entities ) === 0 ) { + $this->finished = true; + return false; + } else { + $this->current_entity = array_shift( $this->enqueued_entities ); + return true; + } + } + + $all_metadata = $this->metadata; + $post_fields = array(); + $other_metadata = array(); + foreach ( $all_metadata as $key => $values ) { + if ( in_array( $key, WP_Imported_Entity::POST_FIELDS, true ) ) { + $post_fields[ $key ] = $values[0]; + } else { + $other_metadata[ $key ] = $values[0]; + } + } + + $post_fields['post_id'] = $this->post_id; + $post_fields['post_content'] = $this->block_markup; + + // In Markdown, the frontmatter title can be a worse title candidate than + // the first H1 block. In block markup exports, it will be the opposite. + // + // @TODO: Enable the API consumer to customize the title resolution. + if ( ! $post_fields['post_title'] ) { + $removed_title = WP_Import_Utils::remove_first_h1_block_from_block_markup( $post_fields['post_content'] ); + if ( false !== $removed_title ) { + $post_fields['post_title'] = $removed_title['h1_content']; + $post_fields['post_content'] = $removed_title['remaining_html']; + } + } + + // Yield the post entity. + $this->enqueued_entities[] = new WP_Imported_Entity( 'post', $post_fields ); + + // Yield all the metadata that don't belong to the post entity. + foreach ( $other_metadata as $key => $value ) { + $this->enqueued_entities[] = new WP_Imported_Entity( + 'post_meta', + array( + 'post_id' => $this->post_id, + 'meta_key' => $key, + 'meta_value' => $value, + ) + ); + } + + $this->current_entity = array_shift( $this->enqueued_entities ); + return true; + } + + public function get_entity() { + if ( $this->is_finished() ) { + return false; + } + return $this->current_entity; + } + + public function is_finished(): bool { + return $this->finished; + } + + public function get_last_error(): ?string { + return $this->last_error; + } +} From 851ad045a2194efa3be456836c69b6a36fe5ca91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Thu, 19 Dec 2024 15:28:21 +0100 Subject: [PATCH 19/26] Adjust the used array heys --- .../src/entity-readers/WP_Block_Markup_Entity_Reader.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php index 87c53a4c82..17f450051c 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php @@ -55,7 +55,7 @@ public function next_entity() { // the first H1 block. In block markup exports, it will be the opposite. // // @TODO: Enable the API consumer to customize the title resolution. - if ( ! $post_fields['post_title'] ) { + if ( ! isset( $post_fields['post_title'] ) ) { $removed_title = WP_Import_Utils::remove_first_h1_block_from_block_markup( $post_fields['post_content'] ); if ( false !== $removed_title ) { $post_fields['post_title'] = $removed_title['h1_content']; From cbdbb8e735b9c571406b781c1cb4e6bdfa10c3a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 20 Dec 2024 01:43:41 +0100 Subject: [PATCH 20/26] Save data to the original static file path --- .../src/WP_Blocks_To_Markdown.php | 4 +- .../plugin.php | 84 +++++++----- .../playground/data-liberation/bootstrap.php | 3 +- .../src/entity-readers/WP_Entity_Reader.php | 11 +- .../WP_Filesystem_Entity_Reader.php | 122 ++++++++++++++++++ ...chy.php => WP_Filesystem_To_Post_Tree.php} | 16 +-- 6 files changed, 193 insertions(+), 47 deletions(-) create mode 100644 packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php rename packages/playground/data-liberation/src/entity-readers/{WP_Filesystem_To_Post_Hierarchy.php => WP_Filesystem_To_Post_Tree.php} (95%) diff --git a/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php b/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php index 40d43911e7..20a7c2c955 100644 --- a/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php +++ b/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php @@ -20,7 +20,7 @@ public function convert() { return true; } - public function get_markdown() { + public function get_result() { return $this->markdown; } @@ -174,7 +174,7 @@ private function skip_and_convert_inner_html() { $html = $this->blocks->skip_and_get_block_inner_html(); $converter = new WP_Blocks_To_Markdown($html, [], $breadcrumbs_inside_block); $converter->convert(); - return $converter->get_markdown(); + return $converter->get_result(); } private function longest_sequence_of($input, $substring) { diff --git a/packages/playground/data-liberation-static-files-editor/plugin.php b/packages/playground/data-liberation-static-files-editor/plugin.php index 110e10e99d..41380dce78 100644 --- a/packages/playground/data-liberation-static-files-editor/plugin.php +++ b/packages/playground/data-liberation-static-files-editor/plugin.php @@ -21,7 +21,7 @@ class WP_Static_Files_Editor_Plugin { static public function register_hooks() { register_activation_hook( __FILE__, array(self::class, 'import_static_pages') ); - add_action('save_post_page', array(self::class, 'on_save_post')); + add_action('save_post', array(self::class, 'on_save_post')); } /** @@ -42,18 +42,9 @@ static public function import_static_pages() { $importer = WP_Stream_Importer::create( function () { - return WP_Directory_Tree_Entity_Reader::create( + return new WP_Filesystem_Entity_Reader( new WP_Filesystem(), - array ( - 'root_dir' => WP_STATIC_CONTENT_DIR, - 'create_root_page' => true, - 'first_post_id' => 2, - 'allowed_extensions' => array( 'md' ), - 'index_file_patterns' => array( '#^index\.md$#' ), - 'markup_converter_factory' => function( $content ) { - return new WP_Markdown_To_Blocks( $content ); - }, - ) + WP_STATIC_CONTENT_DIR ); }, array(), @@ -107,12 +98,12 @@ static public function on_save_post($post_id) { return; } - self::deltree(WP_STATIC_CONTENT_DIR); + // self::deltree(WP_STATIC_CONTENT_DIR); mkdir(WP_STATIC_CONTENT_DIR); self::save_db_pages_as_html(WP_STATIC_CONTENT_DIR); } - static private function save_db_pages_as_html($path, $parent_id = 0) { + static private function save_db_pages_as_html($path, $parent_id = null) { if (!file_exists($path)) { mkdir($path, 0777, true); } @@ -130,18 +121,39 @@ static private function save_db_pages_as_html($path, $parent_id = 0) { $pages->the_post(); $page_id = get_the_ID(); $page = get_post($page_id); - $title = sanitize_title(get_the_title()); - - // $content = '

' . esc_html(get_the_title()) . "

\n\n" . get_the_content(); - - $converter = new WP_Blocks_To_Markdown( - $page->post_content, - array( - 'title' => get_the_title(), - ) + + $content_converter = get_post_meta($page_id, 'content_converter', true); + if(empty($content_converter)) { + $content_converter = 'md'; + } + + $title_block = ( + WP_Import_Utils::block_opener('heading', array('level' => 1)) . + '

' . esc_html(get_the_title()) . '

' . + WP_Import_Utils::block_closer('heading') ); - $converter->convert(); - $content = $converter->get_markdown(); + $block_markup = $title_block . $page->post_content; + + switch($content_converter) { + case 'html': + case 'xhtml': + // @TODO: Implement a Blocks to HTML converter. + break; + case 'md': + default: + $converter = new WP_Blocks_To_Markdown( + $block_markup, + array( + 'title' => get_the_title(), + ) + ); + if(false === $converter->convert()) { + // @TODO: error handling. + } + $content = $converter->get_result(); + break; + } + $child_pages = get_pages(array('child_of' => $page_id, 'post_type' => 'page')); @@ -149,17 +161,23 @@ static private function save_db_pages_as_html($path, $parent_id = 0) { mkdir($path, 0777, true); } + $source_path_relative = get_post_meta($page_id, 'source_path', true); + if(empty($source_path_relative)) { + $title = sanitize_title(get_the_title()); + $source_path_relative = $page->menu_order . '_' . $title . '.' . $content_converter; + } + $source_file_path = WP_STATIC_CONTENT_DIR . '/' . $source_path_relative; if (!empty($child_pages)) { - $new_parent = $path . '/' . $page->menu_order . '_' . $title; - if (!file_exists($new_parent)) { - mkdir($new_parent, 0777, true); + if(is_dir($source_file_path)) { + $dirname = $source_file_path; + } else { + $dirname = dirname($source_file_path); + mkdir($dirname, 0777, true); } - // file_put_contents($new_parent . '/index.html', $content); - file_put_contents($new_parent . '/index.md', $content); - self::save_db_pages_as_html($new_parent, $page_id); + file_put_contents($source_file_path . '/index.' . $content_converter, $content); + self::save_db_pages_as_html($dirname, $page_id); } else { - // file_put_contents($path . '/' . $page->menu_order . '_' . $title . '.html', $content); - file_put_contents($path . '/' . $page->menu_order . '_' . $title . '.md', $content); + file_put_contents($source_file_path, $content); } } } diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index b92170ea40..0c8168f3da 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -62,10 +62,11 @@ require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_EPub_Entity_Reader.php'; +require_once __DIR__ . '/src/entity-readers/WP_Filesystem_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_WXR_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_Block_Markup_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_Directory_Tree_Entity_Reader.php'; -require_once __DIR__ . '/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php'; +require_once __DIR__ . '/src/entity-readers/WP_Filesystem_To_Post_Tree.php'; require_once __DIR__ . '/src/xml-api/WP_XML_Decoder.php'; require_once __DIR__ . '/src/xml-api/WP_XML_Processor.php'; diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php index a45017fd0f..ba5246a9ca 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Entity_Reader.php @@ -60,9 +60,7 @@ public function get_reentrancy_cursor() { // The iterator interface: public function current(): object { - if ( null === $this->get_entity() && ! $this->is_finished() && ! $this->get_last_error() ) { - $this->next(); - } + $this->ensure_current_entity(); return $this->get_entity(); } @@ -78,6 +76,7 @@ public function key(): string { } public function valid(): bool { + $this->ensure_current_entity(); return false !== $this->last_next_result && ! $this->is_finished() && ! $this->get_last_error(); } @@ -92,4 +91,10 @@ public function rewind(): void { null ); } + + private function ensure_current_entity() { + if ( null === $this->get_entity() && ! $this->is_finished() && ! $this->get_last_error() ) { + $this->next(); + } + } } diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php new file mode 100644 index 0000000000..1ca0a0947f --- /dev/null +++ b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php @@ -0,0 +1,122 @@ +filesystem = $filesystem; + $this->post_tree = WP_Filesystem_To_Post_Tree::create( + $this->filesystem, + array ( + 'root_dir' => $root_dir, + 'first_post_id' => 2, + 'filter_pattern' => '#\.(?:md|html|xhtml)$#', + 'index_file_pattern' => '#^index\.[a-z]+$#', + ) + ); + } + + public function get_last_error(): ?string { + // @TODO: Implement this. + return null; + } + + public function get_entity() { + return $this->current_entity; + } + + public function is_finished(): bool { + return $this->finished; + } + + public function next_entity(): bool { + while(true) { + while(count($this->entities) > 0) { + $this->current_entity = array_shift( $this->entities ); + return true; + } + + if( ! $this->post_tree->next_node() ) { + $this->finished = true; + return false; + } + + $source_content_converter = null; + $post_tree_node = $this->post_tree->get_current_node(); + if($post_tree_node['type'] === 'file') { + $content = $this->filesystem->read_file($post_tree_node['source_path']); + $extension = pathinfo($post_tree_node['source_path'], PATHINFO_EXTENSION); + switch($extension) { + case 'md': + $converter = new WP_Markdown_To_Blocks( $content ); + $source_content_converter = 'md'; + break; + case 'xhtml': + $converter = new WP_HTML_To_Blocks( WP_XML_Processor::create_from_string( $content ) ); + $source_content_converter = 'xhtml'; + break; + case 'html': + default: + $converter = new WP_HTML_To_Blocks( WP_HTML_Processor::create_fragment( $content ) ); + $source_content_converter = 'html'; + break; + } + + if( false === $converter->convert() ) { + throw new Exception('Failed to convert Markdown to blocks'); + } + $markup = $converter->get_block_markup(); + $metadata = $converter->get_all_metadata(); + } else { + $markup = ''; + $metadata = array(); + // @TODO: Accept an option to set what should we default to. + $source_content_converter = 'html'; + } + + $reader = new WP_Block_Markup_Entity_Reader( + $markup, + $metadata, + $post_tree_node['post_id'] + ); + while($reader->next_entity()) { + $entity = $reader->get_entity(); + $data = $entity->get_data(); + if( $entity->get_type() === 'post' ) { + $data['id'] = $post_tree_node['post_id']; + $data['guid'] = $post_tree_node['source_path']; + $data['post_parent'] = $post_tree_node['parent_id']; + $data['post_title'] = $data['post_title'] ?? null; + $data['post_type'] = 'page'; + if ( ! $data['post_title'] ) { + $data['post_title'] = WP_Import_Utils::slug_to_title( basename( $post_tree_node['source_path'] ) ); + } + $entity = new WP_Imported_Entity( $entity->get_type(), $data ); + } + $this->entities[] = $entity; + } + + // Also emit: + $additional_meta = array( + 'source_path' => $post_tree_node['source_path'], + 'source_type' => $post_tree_node['type'], + 'source_content_converter' => $source_content_converter, + ); + foreach($additional_meta as $key => $value) { + $this->entities[] = new WP_Imported_Entity( + 'post_meta', + array( + 'post_id' => $post_tree_node['post_id'], + 'meta_key' => $key, + 'meta_value' => $value, + ) + ); + } + } + } +} diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php similarity index 95% rename from packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php rename to packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php index 139f737f19..2a04f17f46 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Hierarchy.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php @@ -2,10 +2,10 @@ /** */ -class WP_Filesystem_To_Post_Hierarchy { +class WP_Filesystem_To_Post_Tree { private $file_visitor; - private $current_post; + private $current_node; private $pending_files = array(); private $pending_directory_index; @@ -31,7 +31,7 @@ public static function create( return false; } if ( 1 === $options['first_post_id'] ) { - _doing_it_wrong( __FUNCTION__, 'First post ID must be greater than 1', '1.0.0' ); + _doing_it_wrong( __FUNCTION__, 'First node ID must be greater than 1', '1.0.0' ); return false; } if ( ! isset( $options['filter_pattern'] ) ) { @@ -59,12 +59,12 @@ private function __construct( $this->index_file_pattern = $options['index_file_pattern']; } - public function get_current_post() { - return $this->current_post; + public function get_current_node() { + return $this->current_node; } - public function next_post() { - $this->current_post = null; + public function next_node() { + $this->current_node = null; if ( $this->is_finished ) { return false; } @@ -157,7 +157,7 @@ public function next_post() { protected function emit_object( $options ) { $post_id = $this->next_post_id; ++$this->next_post_id; - $this->current_post = array_merge( + $this->current_node = array_merge( $options, array( 'post_id' => $post_id, From 0375bbe00ae1c9b146355a5e769443f2d6e61885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 20 Dec 2024 18:28:12 +0100 Subject: [PATCH 21/26] Rewrite Blocks to Markdown --- .../data-liberation-markdown/plugin.php | 2 +- .../src/WP_Blocks_To_Markdown.php | 384 +++++++++-------- .../src/WP_Markdown_To_Blocks.php | 192 ++++----- .../plugin.php | 92 +++- .../playground/data-liberation/phpunit.xml | 10 +- .../WP_Filesystem_Entity_Reader.php | 14 +- .../WP_Filesystem_To_Post_Tree.php | 20 +- .../src/import/WP_Entity_Importer.php | 5 +- .../src/import/WP_Import_Utils.php | 7 +- .../tests/WPBlocksToMarkdownTests.php | 403 ++++++++++++++++++ .../tests/WPHTMLToBlocksTests.php | 8 +- .../tests/WPMarkdownToBlocksTests.php | 149 +++++++ .../data-liberation/tests/bootstrap.php | 4 + 13 files changed, 973 insertions(+), 317 deletions(-) create mode 100644 packages/playground/data-liberation/tests/WPBlocksToMarkdownTests.php create mode 100644 packages/playground/data-liberation/tests/WPMarkdownToBlocksTests.php create mode 100644 packages/playground/data-liberation/tests/bootstrap.php diff --git a/packages/playground/data-liberation-markdown/plugin.php b/packages/playground/data-liberation-markdown/plugin.php index fdea9ae81e..3fb59bea2b 100644 --- a/packages/playground/data-liberation-markdown/plugin.php +++ b/packages/playground/data-liberation-markdown/plugin.php @@ -2,4 +2,4 @@ /** * Plugin Name: Data Liberation – Markdown importer */ -require_once __DIR__ . '/src/bootstrap.php'; +require_once __DIR__ . '/src/bootstrap.php'; \ No newline at end of file diff --git a/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php b/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php index 20a7c2c955..1530986235 100644 --- a/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php +++ b/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php @@ -1,198 +1,240 @@ blocks = WP_Block_Markup_Processor::create_fragment($block_markup); - $this->metadata = $metadata; - $this->context_breadcrumbs = $context_breadcrumbs; - } - - public function convert() { - $this->blocks_to_markdown(); - return true; - } - - public function get_result() { - return $this->markdown; - } - - private function blocks_to_markdown() { - if($this->metadata) { - $this->markdown .= "---\n"; - foreach($this->metadata as $key => $value) { - // @TODO: Apply correct YAML value escaping - $value = json_encode($value); - $this->markdown .= "$key: $value\n"; - } - $this->markdown .= "---\n\n"; - } + private $block_markup; + private $state; + private $parents = []; - while($this->blocks->next_token()) { - switch($this->blocks->get_token_type()) { - case '#block-comment': - $this->handle_block_comment(); - break; - case '#tag': - $this->handle_tag(); - break; - case '#text': - $this->markdown .= ltrim(preg_replace('/ +/', ' ', $this->blocks->get_modifiable_text())); - break; - } - } - } + public function __construct($block_markup) { + $this->block_markup = $block_markup; + $this->state = array( + 'indent' => array(), + 'listStyle' => array() + ); + } + + private $markdown; + + public function convert() { + $this->markdown = $this->blocks_to_markdown(parse_blocks($this->block_markup)); + } - private function handle_block_comment() { - if ( $this->blocks->is_block_closer() ) { - return; + public function get_result() { + return $this->markdown; + } + + private function blocks_to_markdown($blocks) { + $output = ''; + foreach ($blocks as $block) { + array_push($this->parents, $block['blockName']); + $output .= $this->block_to_markdown($block); + array_pop($this->parents); } - switch($this->blocks->get_block_name()) { - case 'wp:quote': - $markdown = $this->skip_and_convert_inner_html(); - $lines = explode("\n", $markdown); - foreach($lines as $line) { - $this->markdown .= "> $line\n"; + return $output; + } + + private function block_to_markdown($block) { + $block_name = $block['blockName']; + $attributes = $block['attrs'] ?? array(); + $inner_html = $block['innerHTML'] ?? ''; + $inner_blocks = $block['innerBlocks'] ?? array(); + + switch ($block_name) { + case 'core/paragraph': + return $this->html_to_markdown($inner_html) . "\n\n"; + + case 'core/quote': + $content = $this->blocks_to_markdown($inner_blocks); + $lines = explode("\n", $content); + return implode("\n", array_map(function($line) { + return "> $line"; + }, $lines)) . "\n\n"; + + case 'core/code': + $code = $this->html_to_markdown($inner_html); + $language = $attributes['language'] ?? ''; + $fence = str_repeat('`', max(3, $this->longest_sequence_of($code, '`') + 1)); + return "{$fence}{$language}\n{$code}\n{$fence}\n\n"; + + case 'core/image': + return "![" . ($attributes['alt'] ?? '') . "](" . ($attributes['url'] ?? '') . ")\n\n"; + + case 'core/heading': + $level = $attributes['level'] ?? 1; + $content = $this->html_to_markdown($inner_html); + return str_repeat('#', $level) . ' ' . $content . "\n\n"; + + case 'core/list': + array_push($this->state['listStyle'], array( + 'style' => isset($attributes['ordered']) ? ($attributes['type'] ?? 'decimal') : '-', + 'count' => $attributes['start'] ?? 1 + )); + $list = $this->blocks_to_markdown($inner_blocks); + array_pop($this->state['listStyle']); + if($this->has_parent('core/list-item')){ + return $list; } - $this->markdown .= ">\n"; - break; - case 'wp:list': - $markdown = $this->skip_and_convert_inner_html(); - $lines = explode("\n", $markdown); - foreach($lines as $line) { - if($line) { - $this->markdown .= "* $line\n"; + return $list . "\n"; + + case 'core/list-item': + if (empty($this->state['listStyle'])) { + return ''; + } + + $item = end($this->state['listStyle']); + $bullet = $this->get_list_bullet($item); + $bullet_indent = str_repeat(' ', strlen($bullet) + 1); + + $content = $this->html_to_markdown($inner_html); + $content_parts = explode("\n", $content, 2); + $content_parts = array_map('trim', $content_parts); + $first_line = $content_parts[0]; + $rest_lines = $content_parts[1] ?? ''; + + $item['count']++; + + if (empty($inner_html)) { + $output = implode('', $this->state['indent']) . "$bullet $first_line\n"; + array_push($this->state['indent'], $bullet_indent); + if ($rest_lines) { + $output .= $this->indent($rest_lines, $bullet_indent); } + array_pop($this->state['indent']); + return $output; } - $this->markdown .= "\n"; - break; - case 'wp:list-item': - $this->markdown .= $this->skip_and_convert_inner_html() . "\n"; - break; - case 'wp:group': - // Ignore group blocks and process their inner blocks as if - // the group didn't exist. - break; - case 'wp:code': - $code = $this->skip_and_convert_inner_html(); - $language = $this->blocks->get_block_attribute('language') ?? ''; - $this->markdown .= $this->wrap_in_code_fence($code, $language); - break; - case 'wp:image': - $alt = $this->blocks->get_block_attribute('alt') ?? ''; - $url = $this->blocks->get_block_attribute('url'); - $this->markdown .= "![$alt]($url)\n\n"; - break; - case 'wp:heading': - $level = $this->blocks->get_block_attribute('level') ?? 1; - $content = $this->skip_and_convert_inner_html(); - $this->markdown .= str_repeat('#', $level) . ' ' . $content . "\n\n"; - break; - case 'wp:paragraph': - $this->markdown .= $this->skip_and_convert_inner_html() . "\n\n"; - break; - case 'wp:separator': - $this->markdown .= "\n---\n\n"; - break; - default: - $code = ''; - if($this->blocks->is_self_closing_block()) { - $code .= ''; + + $markdown = $this->indent("$bullet $first_line\n"); + + array_push($this->state['indent'], $bullet_indent); + if($rest_lines){ + $markdown .= $this->indent($rest_lines) . "\n"; + } + $inner_blocks_markdown = $this->blocks_to_markdown( + $inner_blocks + ); + if($inner_blocks_markdown){ + $markdown .= $inner_blocks_markdown . "\n"; + } + array_pop($this->state['indent']); + + $markdown = rtrim($markdown, "\n"); + if($this->has_parent('core/list-item')){ + $markdown .= "\n"; } else { - $code .= '' . "\n"; - $code .= trim($this->skip_and_convert_inner_html()) . "\n"; - $code .= ''; + $markdown .= "\n\n"; } - $this->markdown .= $this->wrap_in_code_fence($code, 'block'); - break; + + return $markdown; + + case 'core/separator': + return "\n---\n\n"; + + default: + return ''; } } - private function wrap_in_code_fence($code, $language = '') { - $fence = str_repeat('`', max(3, $this->longest_sequence_of($code, '`') + 1)); - return "$fence$language\n$code\n$fence\n\n"; - } + private function html_to_markdown($html, $parents = []) { + $processor = WP_HTML_Processor::create_fragment($html); + $markdown = ''; + + while ($processor->next_token()) { + if ($processor->get_token_type() === '#text') { + $markdown .= $processor->get_modifiable_text(); + continue; + } else if ($processor->get_token_type() !== '#tag') { + continue; + } + + $last_href = null; + $tag_name = $processor->get_tag(); + $sign = $processor->is_tag_closer() ? '-' : ( + $processor->expects_closer() ? '+' : '' + ); + $event = $sign . $tag_name; + switch ($event) { + case '+B': + case '-B': + case '+STRONG': + case '-STRONG': + $markdown .= '**'; + break; + + case '+I': + case '-I': + case '+EM': + case '-EM': + $markdown .= '*'; + break; + + case '+CODE': + case '-CODE': + if(!$this->has_parent('core/code')){ + $markdown .= '`'; + } + break; + + case '+A': + $last_href = $processor->get_attribute('href') ?? ''; + $markdown .= '['; + break; - private function handle_tag() { - $prefix = $this->blocks->is_tag_closer() ? '-' : '+'; - $event = $prefix . $this->blocks->get_tag(); - switch($event) { - case '+B': - case '-B': - case '+STRONG': - case '-STRONG': - $this->markdown .= '**'; - break; - case '+I': - case '-I': - case '+EM': - case '-EM': - $this->markdown .= '*'; - break; - case '+U': - case '-U': - $this->markdown .= '_'; - break; - case '+CODE': - case '-CODE': - if(!in_array('wp:code', $this->get_block_breadcrumbs(), true)) { - $this->markdown .= '`'; - } - break; - case '+A': - $href = $this->blocks->get_attribute('href'); - $this->markdown .= '['; - break; - case '-A': - $href = $this->blocks->get_attribute('href'); - $this->markdown .= "]($href)"; - break; - case '+BR': - $this->markdown .= "\n"; - break; - case '+IMG': - $alt = $this->blocks->get_attribute('alt') ?? ''; - $url = $this->blocks->get_attribute('src'); - $this->markdown .= "![$alt]($url)\n\n"; - break; + case '-A': + $markdown .= "]($last_href)"; + break; + + case 'BR': + $markdown .= "\n"; + break; + } } + + $markdown = trim($markdown, "\n "); + $markdown = preg_replace('/ +/', ' ', $markdown); + $markdown = preg_replace('/\n+/', "\n", $markdown); + return $markdown; } - private function skip_and_convert_inner_html() { - // It's important we call get_block_breadcrumbs() before - // calling skip_and_get_block_inner_html() because the - // latter will get to the block closer and pop the block - // we've just entered from the stack. - $breadcrumbs_inside_block = $this->get_block_breadcrumbs(); - $html = $this->blocks->skip_and_get_block_inner_html(); - $converter = new WP_Blocks_To_Markdown($html, [], $breadcrumbs_inside_block); - $converter->convert(); - return $converter->get_result(); + private function has_parent($parent) { + return in_array($parent, $this->parents, true); } - private function longest_sequence_of($input, $substring) { - $at = 0; - $sequence_length = 0; - while($at < strlen($input)) { - $at += strcspn($input, $substring, $at); - $current_sequence_length = strspn($input, $substring, $at); - if($current_sequence_length > $sequence_length) { - $sequence_length = $current_sequence_length; - } - $at += $current_sequence_length; + private function get_list_bullet($item) { + if ($item['style'] === '-') { + return '-'; + } + return $item['count'] . '.'; + } + + private function indent($string) { + if (empty($this->state['indent'])) { + return $string; } - return $sequence_length; - } - private function get_block_breadcrumbs() { - return array_merge($this->context_breadcrumbs, $this->blocks->get_block_breadcrumbs()); + $indent = implode('', $this->state['indent']); + $lines = explode("\n", $string); + return implode("\n", array_map(function($line) use ($indent) { + return empty($line) ? $line : $indent . $line; + }, $lines)); } + private function longest_sequence_of($input, $substring) { + $longest = 0; + $current = 0; + $len = strlen($input); + + for ($i = 0; $i < $len; $i++) { + if ($input[$i] === $substring) { + $current++; + $longest = max($longest, $current); + } else { + $current = 0; + } + } + + return $longest; + } } diff --git a/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php index f63fb20c52..4f1aabda85 100644 --- a/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php +++ b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php @@ -26,13 +26,11 @@ class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter { const STATE_COMPLETE = 'STATE_COMPLETE'; private $state = self::STATE_READY; - private $root_block; private $block_stack = array(); - private $current_block = null; + private $table_stack = array(); private $frontmatter = array(); private $markdown; - private $parsed_blocks = array(); private $block_markup = ''; public function __construct( $markdown ) { @@ -44,7 +42,7 @@ public function convert() { return false; } $this->convert_markdown_to_blocks(); - $this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks ); + // $this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks ); return true; } @@ -64,10 +62,6 @@ public function get_block_markup() { } private function convert_markdown_to_blocks() { - $this->root_block = $this->create_block( 'post-content' ); - $this->block_stack[] = $this->root_block; - $this->current_block = $this->root_block; - $environment = new Environment( array() ); $environment->addExtension( new CommonMarkCoreExtension() ); $environment->addExtension( new GithubFlavoredMarkdownExtension() ); @@ -81,7 +75,14 @@ private function convert_markdown_to_blocks() { $document = $parser->parse( $this->markdown ); $this->frontmatter = array(); - foreach ( $document->data as $key => $value ) { + foreach ( $document->data->export() as $key => $value ) { + if ( 'attributes' === $key && empty( $value ) ) { + // The Frontmatter extension adds an 'attributes' key to the document data + // even when there is no actual "attributes" key in the frontmatter. + // + // Let's skip it when the value is empty. + continue; + } // Use an array as a value to comply with the WP_Block_Markup_Converter interface. $this->frontmatter[ $key ] = array( $value ); } @@ -105,86 +106,72 @@ private function convert_markdown_to_blocks() { 'heading', array( 'level' => $node->getLevel(), - 'content' => 'getLevel() . '>', ) ); + $this->append_content( 'getLevel() . '>' ); break; case ExtensionBlock\ListBlock::class: - $this->push_block( - 'list', - array( - 'ordered' => $node->getListData()->type === 'ordered', - 'content' => '
    ', - ) + $attrs = array( + 'ordered' => $node->getListData()->type === 'ordered', ); if ( $node->getListData()->start && $node->getListData()->start !== 1 ) { - $this->current_block->attrs['start'] = $node->getListData()->start; + $attrs['start'] = $node->getListData()->start; } + $this->push_block( + 'list', + $attrs + ); + $this->append_content( '
      ' ); break; case ExtensionBlock\ListItem::class: - $this->push_block( - 'list-item', - array( - 'content' => '
    • ', - ) - ); + $this->push_block( 'list-item' ); + $this->append_content( '
    • ' ); break; case Table::class: - $this->push_block( - 'table', - array( - 'head' => array(), - 'body' => array(), - 'foot' => array(), - ) - ); + $this->push_block( 'table' ); + $this->append_content( '
      ' ); break; case TableSection::class: - $this->push_block( - 'table-section', - array( - 'type' => $node->isHead() ? 'head' : 'body', - ) - ); + $is_head = $node->isHead(); + array_push( $this->table_stack, $is_head ? 'head' : 'body' ); + $this->append_content( $is_head ? '' : '' ); break; case TableRow::class: - $this->push_block( 'table-row' ); + $this->append_content( '' ); break; case TableCell::class: /** @var TableCell $node */ - $this->push_block( 'table-cell' ); + $is_header = $this->current_block() && $this->current_block()->block_name === 'table' && end( $this->table_stack ) === 'head'; + $tag = $is_header ? 'th' : 'td'; + $this->append_content( '<' . $tag . '>' ); break; case ExtensionBlock\BlockQuote::class: $this->push_block( 'quote' ); + $this->append_content( '
      ' ); break; case ExtensionBlock\FencedCode::class: case ExtensionBlock\IndentedCode::class: - $this->push_block( - 'code', - array( - 'content' => '
      ' . trim( str_replace( "\n", '
      ', htmlspecialchars( $node->getLiteral() ) ) ) . '
      ', - ) + $attrs = array( + 'language' => null, ); if ( method_exists( $node, 'getInfo' ) && $node->getInfo() ) { - $this->current_block->attrs['language'] = preg_replace( '/[ \t\r\n\f].*/', '', $node->getInfo() ); + $attrs['language'] = preg_replace( '/[ \t\r\n\f].*/', '', $node->getInfo() ); } + $this->push_block( 'code', $attrs ); + $this->append_content( '
      ' . trim( str_replace( "\n", '
      ', htmlspecialchars( $node->getLiteral() ) ) ) . '
      ' ); break; case ExtensionBlock\HtmlBlock::class: - $this->push_block( - 'html', - array( - 'content' => $node->getLiteral(), - ) - ); + $this->push_block( 'html' ); + $this->append_content( $node->getLiteral() ); break; case ExtensionBlock\ThematicBreak::class: @@ -192,15 +179,11 @@ private function convert_markdown_to_blocks() { break; case Block\Paragraph::class: - if ( $this->current_block->block_name === 'list-item' ) { + if ( $this->current_block()->block_name === 'list-item' ) { break; } - $this->push_block( - 'paragraph', - array( - 'content' => '

      ', - ) - ); + $this->push_block( 'paragraph' ); + $this->append_content( '

      ' ); break; case Inline\Newline::class: @@ -236,6 +219,15 @@ private function convert_markdown_to_blocks() { if ( $node->getTitle() ) { $html->set_attribute( 'title', $node->getTitle() ); } + + $children = $node->children(); + if ( count( $children ) > 0 && $children[0] instanceof Inline\Text && $children[0]->getLiteral() ) { + $html->set_attribute( 'alt', $children[0]->getLiteral() ); + // Empty the text node so it will not be rendered twice: once in as an alt="", + // and once as a new paragraph block. + $children[0]->setLiteral( '' ); + } + $this->append_content( $html->get_updated_html() ); break; @@ -257,6 +249,10 @@ private function convert_markdown_to_blocks() { } } else { switch ( get_class( $node ) ) { + case ExtensionBlock\BlockQuote::class: + $this->append_content( '

      ' ); + $this->pop_block(); + break; case ExtensionBlock\ListBlock::class: $this->append_content( '' ); $this->pop_block(); @@ -279,53 +275,25 @@ private function convert_markdown_to_blocks() { $this->append_content( '' ); break; case TableSection::class: - $table_section = $this->pop_block(); - $type = $table_section->attrs['type']; - $tag = $type === 'head' ? 'th' : 'td'; - - $parsed_rows = array(); - foreach ( $table_section->inner_blocks as $row ) { - $parsed_row = array(); - foreach ( $row->inner_blocks as $cell ) { - $parsed_row[] = array( - 'tag' => $tag, - 'content' => $cell->attrs['content'] ?? '', - ); - } - $parsed_rows[] = $parsed_row; - } - - $table = $this->current_block; - if ( $type === 'head' ) { - $table->attrs[ $type ] = $parsed_rows[0]; - } else { - $table->attrs[ $type ] = $parsed_rows; - } - $table->inner_blocks = array(); + $is_head = $node->isHead(); + array_pop( $this->table_stack ); + $this->append_content( $is_head ? '' : '' ); + break; + case TableRow::class: + $this->append_content( '' ); + break; + case TableCell::class: + $is_header = $this->current_block() && $this->current_block()->block_name === 'table' && end( $this->table_stack ) === 'head'; + $tag = $is_header ? 'th' : 'td'; + $this->append_content( '' ); break; case Table::class: - $table = '
      '; - $table .= '
      '; - $table .= ''; - foreach ( $this->current_block->attrs['head'] as $cell ) { - $table .= ''; - } - $table .= ''; - foreach ( $this->current_block->attrs['body'] as $row ) { - $table .= ''; - foreach ( $row as $cell ) { - $table .= ''; - } - $table .= ''; - } - $table .= '
      ' . $cell['content'] . '
      ' . $cell['content'] . '
      '; - $table .= '
      '; - $this->current_block->attrs['content'] = $table; + $this->append_content( '' ); $this->pop_block(); break; case Block\Paragraph::class: - if ( $this->current_block->block_name === 'list-item' ) { + if ( $this->current_block()->block_name === 'list-item' ) { break; } $this->append_content( '

      ' ); @@ -346,36 +314,30 @@ private function convert_markdown_to_blocks() { } } } - $this->parsed_blocks = $this->root_block->inner_blocks; } private function append_content( $content ) { - if ( ! isset( $this->current_block->attrs['content'] ) ) { - $this->current_block->attrs['content'] = ''; - } - $this->current_block->attrs['content'] .= $content; + $this->block_markup .= $content; } - private function push_block( $name, $attributes = array(), $inner_blocks = array() ) { - $block = $this->create_block( $name, $attributes, $inner_blocks ); - $this->current_block->inner_blocks[] = $block; - array_push( $this->block_stack, $block ); - $this->current_block = $block; - } - - private function create_block( $name, $attributes = array(), $inner_blocks = array() ) { - return new WP_Block_Object( + private function push_block( $name, $attributes = array() ) { + $block = new WP_Block_Object( $name, $attributes, - $inner_blocks ); + array_push( $this->block_stack, $block ); + $this->block_markup .= WP_Import_Utils::block_opener( $block->block_name, $block->attrs ) . "\n"; } private function pop_block() { if ( ! empty( $this->block_stack ) ) { $popped = array_pop( $this->block_stack ); - $this->current_block = end( $this->block_stack ); + $this->block_markup .= WP_Import_Utils::block_closer( $popped->block_name ) . "\n"; return $popped; } } + + private function current_block() { + return end( $this->block_stack ); + } } diff --git a/packages/playground/data-liberation-static-files-editor/plugin.php b/packages/playground/data-liberation-static-files-editor/plugin.php index 41380dce78..cf869b7eb6 100644 --- a/packages/playground/data-liberation-static-files-editor/plugin.php +++ b/packages/playground/data-liberation-static-files-editor/plugin.php @@ -22,6 +22,94 @@ class WP_Static_Files_Editor_Plugin { static public function register_hooks() { register_activation_hook( __FILE__, array(self::class, 'import_static_pages') ); add_action('save_post', array(self::class, 'on_save_post')); + add_action('init', function() { + $converter = new WP_Blocks_To_Markdown(<< +

      WordPress 6.8 was released

      + + + +

      Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.

      + + + +
        + +
      • Major Features + +
          + +
        • Block Editor Updates + +
            + +
          • New block patterns added
          • + + + +
          • Improved performance + + Hey Hey + + More lines + +
              + +
            • New block + Hey Hey + + More lines patterns added
            • + + +
            • Improved performance
            • + +
            + +
          • + + +
          • Improved performance
          • + +
          + +
        • + + +
        • Improved performance
        • + + +
        • Improved performance
        • + +
        + +
      • + +
      + + + +
      + +
      function hello() {
      +    console.log("Hello world!");
      +}
      + +
      + + + +
      + + +
      Header 1Header 2
      Cell 1Cell 2
      Cell 3Cell 4
      + + +HTML); + echo ''; + $converter->convert(); + var_dump($converter->get_result()); + die(); + }); } /** @@ -99,7 +187,7 @@ static public function on_save_post($post_id) { } // self::deltree(WP_STATIC_CONTENT_DIR); - mkdir(WP_STATIC_CONTENT_DIR); + mkdir(WP_STATIC_CONTENT_DIR, 0777, true); self::save_db_pages_as_html(WP_STATIC_CONTENT_DIR); } @@ -161,7 +249,7 @@ static private function save_db_pages_as_html($path, $parent_id = null) { mkdir($path, 0777, true); } - $source_path_relative = get_post_meta($page_id, 'source_path', true); + $source_path_relative = get_post_meta($page_id, 'source_path_relative', true); if(empty($source_path_relative)) { $title = sanitize_title(get_the_title()); $source_path_relative = $page->menu_order . '_' . $title . '.' . $content_converter; diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index c99ffcf858..8b656b80fb 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -1,10 +1,18 @@ <?xml version="1.0"?> -<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" bootstrap="bootstrap.php" colors="true" xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd" cacheDirectory=".phpunit.cache"> +<phpunit + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + bootstrap="tests/bootstrap.php" + colors="true" + xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd" + cacheDirectory=".phpunit.cache" +> <testsuites> <testsuite name="Application Test Suite"> <file>tests/WPWXRReaderTests.php</file> <file>tests/WPRewriteUrlsTests.php</file> <file>tests/WPHTMLToBlocksTests.php</file> + <file>tests/WPMarkdownToBlocksTests.php</file> + <file>tests/WPBlocksToMarkdownTests.php</file> <file>tests/WPHTMLEntityReaderTests.php</file> <file>tests/WPEPubEntityReaderTests.php</file> <file>tests/WPDirectoryTreeEntityReaderTests.php</file> diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php index 1ca0a0947f..5837bbdc7b 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php @@ -49,8 +49,8 @@ public function next_entity(): bool { $source_content_converter = null; $post_tree_node = $this->post_tree->get_current_node(); if($post_tree_node['type'] === 'file') { - $content = $this->filesystem->read_file($post_tree_node['source_path']); - $extension = pathinfo($post_tree_node['source_path'], PATHINFO_EXTENSION); + $content = $this->filesystem->read_file($post_tree_node['source_path_absolute']); + $extension = pathinfo($post_tree_node['source_path_absolute'], PATHINFO_EXTENSION); switch($extension) { case 'md': $converter = new WP_Markdown_To_Blocks( $content ); @@ -89,12 +89,12 @@ public function next_entity(): bool { $data = $entity->get_data(); if( $entity->get_type() === 'post' ) { $data['id'] = $post_tree_node['post_id']; - $data['guid'] = $post_tree_node['source_path']; + $data['guid'] = $post_tree_node['source_path_relative']; $data['post_parent'] = $post_tree_node['parent_id']; $data['post_title'] = $data['post_title'] ?? null; $data['post_type'] = 'page'; if ( ! $data['post_title'] ) { - $data['post_title'] = WP_Import_Utils::slug_to_title( basename( $post_tree_node['source_path'] ) ); + $data['post_title'] = WP_Import_Utils::slug_to_title( basename( $post_tree_node['source_path_relative'] ) ); } $entity = new WP_Imported_Entity( $entity->get_type(), $data ); } @@ -103,7 +103,7 @@ public function next_entity(): bool { // Also emit: $additional_meta = array( - 'source_path' => $post_tree_node['source_path'], + 'source_path_relative' => $post_tree_node['source_path_relative'], 'source_type' => $post_tree_node['type'], 'source_content_converter' => $source_content_converter, ); @@ -112,8 +112,8 @@ public function next_entity(): bool { 'post_meta', array( 'post_id' => $post_tree_node['post_id'], - 'meta_key' => $key, - 'meta_value' => $value, + 'key' => $key, + 'value' => $value, ) ); } diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php index 2a04f17f46..a66cee982d 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php @@ -12,6 +12,7 @@ class WP_Filesystem_To_Post_Tree { private $parent_ids = array(); private $next_post_id; + private $root_dir; private $create_index_pages; private $entities_read_so_far = 0; private $filter_pattern = '##'; @@ -42,17 +43,15 @@ public static function create( _doing_it_wrong( __FUNCTION__, 'Missing required options: index_file_pattern', '1.0.0' ); return false; } - return new self( - new WordPress\Filesystem\WP_Filesystem_Visitor( $filesystem, $options['root_dir'] ), - $options - ); + return new self( $filesystem, $options ); } private function __construct( - \WordPress\Filesystem\WP_Filesystem_Visitor $file_visitor, + \WordPress\Filesystem\WP_Abstract_Filesystem $filesystem, $options ) { - $this->file_visitor = $file_visitor; + $this->file_visitor = new WordPress\Filesystem\WP_Filesystem_Visitor( $filesystem, $options['root_dir'] ); + $this->root_dir = $options['root_dir']; $this->create_index_pages = $options['create_index_pages'] ?? true; $this->next_post_id = $options['first_post_id']; $this->filter_pattern = $options['filter_pattern']; @@ -102,7 +101,7 @@ public function next_node() { $this->parent_ids[ $missing_parent_id_depth ] = $this->emit_object( array( 'type' => 'directory', - 'source_path' => $missing_parent_path, + 'source_path_absolute' => $missing_parent_path, 'parent_id' => $this->parent_ids[ $missing_parent_id_depth - 1 ] ?? null, ) ); @@ -112,7 +111,7 @@ public function next_node() { $this->parent_ids[ $depth ] = $this->emit_object( array( 'type' => 'file_placeholder', - 'source_path' => $dir, + 'source_path_absolute' => $dir, 'parent_id' => $parent_id, ) ); @@ -123,7 +122,7 @@ public function next_node() { $this->parent_ids[ $depth ] = $this->emit_object( array( 'type' => 'file', - 'source_path' => $file_path, + 'source_path_absolute' => $file_path, 'parent_id' => $parent_id, ) ); @@ -139,7 +138,7 @@ public function next_node() { $this->emit_object( array( 'type' => 'file', - 'source_path' => $file_path, + 'source_path_absolute' => $file_path, 'parent_id' => $parent_id, ) ); @@ -161,6 +160,7 @@ protected function emit_object( $options ) { $options, array( 'post_id' => $post_id, + 'source_path_relative' => substr( $options['source_path_absolute'], strlen( $this->root_dir ) ), ) ); ++$this->entities_read_so_far; diff --git a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php index 95ff593f6f..ec5a5a6d89 100644 --- a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php @@ -865,7 +865,7 @@ public function import_attachment( $filepath, $post_id ) { * @return int|WP_Error Number of meta items imported on success, error otherwise. */ public function import_post_meta( $meta_item, $post_id ) { - if ( empty( $meta ) ) { + if ( empty( $meta_item ) ) { return true; } @@ -880,7 +880,7 @@ public function import_post_meta( $meta_item, $post_id ) { return false; } - $key = apply_filters( 'import_post_meta_key', $meta_item['key'], $post_id, $post ); + $key = apply_filters( 'import_post_meta_key', $meta_item['key'], $post_id ); $value = false; if ( '_edit_last' === $key ) { @@ -894,6 +894,7 @@ public function import_post_meta( $meta_item, $post_id ) { $value = $this->mapping['user'][ $value ]; } + if ( $key ) { // export gets meta straight from the DB so could have a serialized string if ( ! $value ) { diff --git a/packages/playground/data-liberation/src/import/WP_Import_Utils.php b/packages/playground/data-liberation/src/import/WP_Import_Utils.php index 77e29a2870..03e50f54a8 100644 --- a/packages/playground/data-liberation/src/import/WP_Import_Utils.php +++ b/packages/playground/data-liberation/src/import/WP_Import_Utils.php @@ -32,8 +32,11 @@ public static function convert_blocks_to_markup( $blocks ) { continue; } // Start of block comment - $block_markup .= self::block_opener( $block->block_name, $block->attrs ); - $block_markup .= $block->attrs['content'] ?? ''; + $attrs_without_content = $block->attrs; + $content = $block->attrs['content'] ?? ''; + unset( $attrs_without_content['content'] ); + $block_markup .= self::block_opener( $block->block_name, $attrs_without_content ); + $block_markup .= $content; $block_markup .= self::convert_blocks_to_markup( $block->inner_blocks ); $block_markup .= self::block_closer( $block->block_name ); } diff --git a/packages/playground/data-liberation/tests/WPBlocksToMarkdownTests.php b/packages/playground/data-liberation/tests/WPBlocksToMarkdownTests.php new file mode 100644 index 0000000000..9760fbb0b1 --- /dev/null +++ b/packages/playground/data-liberation/tests/WPBlocksToMarkdownTests.php @@ -0,0 +1,403 @@ +<?php + +use PHPUnit\Framework\TestCase; + +class WPBlocksToMarkdownTests extends TestCase { + + public function test_markdown_ast_conversion() { + $blocks = <<<HTML +<!-- wp:heading {"level":1} --> +<h1>WordPress 6.8 was released</h1> +<!-- /wp:heading --> + +<!-- wp:paragraph --> +<p>Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.</p> +<!-- /wp:paragraph --> + +<!-- wp:table --> +<figure class="wp-block-table"><table><thead><tr><th>Feature</th><th>Status</th></tr></thead><tbody><tr><td>Block Editor</td><td>Released</td></tr><tr><td>New Theme</td><td>Released</td></tr></tbody></table></figure> +<!-- /wp:table --> + +<!-- wp:list --> +<ul> + <!-- wp:list-item --> + <li>Major Features + <!-- wp:list --> + <ul> + <!-- wp:list-item --> + <li>Block Editor Updates + <!-- wp:list --> + <ul> + <!-- wp:list-item --> + <li>New <code>block patterns</code> added</li> + <!-- /wp:list-item --> + <!-- wp:list-item --> + <li> + Improved performance + + <!-- wp:list --> + <ul> + <!-- wp:list-item --> + <li>New <code>block patterns</code> added</li> + <!-- /wp:list-item --> + <!-- wp:list-item --> + <li>Improved performance</li> + <!-- /wp:list-item --> + </ul> + <!-- /wp:list --> + </li> + <!-- /wp:list-item --> + </ul> + <!-- /wp:list --> + </li> + <!-- /wp:list-item --> + </ul> + <!-- /wp:list --> + </li> + <!-- /wp:list-item --> +</ul> +<!-- /wp:list --> + +<!-- wp:code --> +<pre class="wp-block-code"><code>function example() { + return "WordPress 6.8"; +}</code></pre> +<!-- /wp:code --> + +<!-- wp:paragraph --> +<p>The <b>most significant</b> update includes <em>improved</em> block editing capabilities.</p> +<!-- /wp:paragraph --> + +HTML; + $expected = [ + [ + 'type' => 'heading', + 'level' => 1, + 'content' => [ + [ + 'type' => 'text', + 'content' => 'WordPress 6.8 was released', + ], + ], + ], + [ + 'type' => 'paragraph', + 'content' => [ + [ + 'type' => 'text', + 'content' => 'Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.', + ], + ], + ], + [ + 'type' => 'html_block', + 'content' => ' +<figure class="wp-block-table"><table><thead><tr><th>Feature</th><th>Status</th></tr></thead><tbody><tr><td>Block Editor</td><td>Released</td></tr><tr><td>New Theme</td><td>Released</td></tr></tbody></table></figure> +', + ], + [ + 'type' => 'list', + 'content' => [ + [ + 'type' => 'list_item', + 'depth' => 0, + 'content' => [ + [ + 'type' => 'text', + 'content' => 'Major Features', + ], + [ + 'type' => 'list', + 'content' => [ + [ + 'type' => 'list_item', + 'depth' => 0, + 'content' => [ + [ + 'type' => 'text', + 'content' => 'Block Editor Updates', + ], + [ + 'type' => 'list', + 'content' => [ + [ + 'type' => 'list_item', + 'depth' => 0, + 'content' => [ + [ + 'type' => 'text', + 'content' => 'New', + ], + [ + 'type' => 'code', + ], + [ + 'type' => 'text', + 'content' => 'block patterns', + ], + [ + 'type' => 'code', + ], + [ + 'type' => 'text', + 'content' => 'added', + ], + ], + ], + [ + 'type' => 'list_item', + 'depth' => 0, + 'content' => [ + [ + 'type' => 'text', + 'content' => 'Improved performance', + ], + ], + ], + ], + ], + ], + ], + ], + ], + ], + ], + ], + ], + [ + 'type' => 'code_block', + 'language' => false, + 'content' => [ + [ + 'type' => 'text', + 'content' => 'function example() { + return "WordPress 6.8"; +}', + ], + ], + ], + [ + 'type' => 'paragraph', + 'content' => [ + [ + 'type' => 'text', + 'content' => 'The', + ], + [ + 'type' => 'strong', + ], + [ + 'type' => 'text', + 'content' => 'most significant', + ], + [ + 'type' => 'strong', + ], + [ + 'type' => 'text', + 'content' => 'update includes', + ], + [ + 'type' => 'emphasis', + ], + [ + 'type' => 'text', + 'content' => 'improved', + ], + [ + 'type' => 'emphasis', + ], + [ + 'type' => 'text', + 'content' => 'block editing capabilities.', + ], + ], + ], + ]; + + $converter = new WP_Blocks_To_Markdown($blocks); + $converter->convert(); + $markdown_ast = $converter->get_markdown_ast(); + + $this->assertEquals($expected, $markdown_ast); + } + + public function test_metadata_preservation() { + $metadata = [ + 'post_title' => 'WordPress 6.8 was released', + 'post_date' => '2024-12-16', + 'post_modified' => '2024-12-16', + 'post_author' => '1', + 'post_author_name' => 'The WordPress Team', + 'post_author_url' => 'https://wordpress.org', + 'post_author_avatar' => 'https://wordpress.org/wp-content/uploads/2024/04/wordpress-logo-2024.png' + ]; + + $blocks = <<<HTML +<!-- wp:heading {"level":1} --> +<h1>WordPress 6.8 was released</h1> +<!-- /wp:heading --> + +<!-- wp:paragraph --> +<p>Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.</p> +<!-- /wp:paragraph --> +HTML; + + $converter = new WP_Blocks_To_Markdown($blocks, $metadata); + $this->assertTrue($converter->convert()); + $markdown = $converter->get_result(); + + $expected = <<<MD +--- +post_title: "WordPress 6.8 was released" +post_date: "2024-12-16" +post_modified: "2024-12-16" +post_author: "1" +post_author_name: "The WordPress Team" +post_author_url: "https:\/\/wordpress.org" +post_author_avatar: "https:\/\/wordpress.org\/wp-content\/uploads\/2024\/04\/wordpress-logo-2024.png" +--- + +# WordPress 6.8 was released + +Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library. +MD; + $this->assertEquals( + trim($expected, " \n"), + trim($markdown, " \n") + ); + } + + /** + * @dataProvider provider_test_conversion + */ + public function test_blocks_to_markdown_conversion($blocks, $expected) { + $converter = new WP_Blocks_To_Markdown($blocks); + $converter->convert(); + $markdown = $converter->get_result(); + + $this->assertEquals($expected, $markdown); + } + + public function provider_test_conversion() { + return [ + 'A simple paragraph' => [ + 'blocks' => '<!-- wp:paragraph --><p>A simple paragraph</p><!-- /wp:paragraph -->', + 'expected' => "A simple paragraph\n\n" + ], + 'A simple list' => [ + 'blocks' => <<<HTML +<!-- wp:list {"ordered":false} --> +<ul class="wp-block-list"> +<!-- wp:list-item --><li>Item 1</li><!-- /wp:list-item --> +<!-- wp:list-item --><li>Item 2</li><!-- /wp:list-item --> +</ul> +<!-- /wp:list --> +HTML, + 'expected' => "* Item 1\n* Item 2\n\n" + ], + 'A nested list' => [ + 'blocks' => <<<HTML +<!-- wp:list {"ordered":false} --> +<ul class="wp-block-list"> +<!-- wp:list-item --><li>Item 1 +<!-- wp:list {"ordered":false} --> +<ul class="wp-block-list"> +<!-- wp:list-item --><li>Item 1.1</li><!-- /wp:list-item --> +<!-- wp:list-item --><li>Item 1.2</li><!-- /wp:list-item --> +</ul> +<!-- /wp:list --> +</li><!-- /wp:list-item --> +<!-- wp:list-item --><li>Item 2</li><!-- /wp:list-item --> +</ul> +<!-- /wp:list --> +HTML, + 'expected' => "* Item 1\n * Item 1.1\n * Item 1.2\n* Item 2\n\n" + ], + 'An image' => [ + 'blocks' => '<!-- wp:image {"url":"https://w.org/logo.png","alt":"An image"} -->', + 'expected' => "![An image](https://w.org/logo.png)\n\n" + ], + 'A heading' => [ + 'blocks' => '<!-- wp:heading {"level":4} --><h4>A simple heading</h4><!-- /wp:heading -->', + 'expected' => "#### A simple heading\n\n" + ], + 'A link inside a paragraph' => [ + 'blocks' => '<!-- wp:paragraph --><p>A simple paragraph with a <a href="https://wordpress.org">link</a></p><!-- /wp:paragraph -->', + 'expected' => "A simple paragraph with a [link](https://wordpress.org)\n\n" + ], + 'Formatted text' => [ + 'blocks' => '<!-- wp:paragraph --><p><b>Bold</b> and <em>Italic</em></p><!-- /wp:paragraph -->', + 'expected' => "**Bold** and *Italic*\n\n" + ], + 'A blockquote' => [ + 'blocks' => '<!-- wp:quote --><blockquote class="wp-block-quote"><!-- wp:paragraph --><p>A simple blockquote</p><!-- /wp:paragraph --></blockquote><!-- /wp:quote -->', + 'expected' => "> A simple blockquote\n> \n" + ], + 'A table' => [ + 'blocks' => <<<HTML +<!-- wp:table --> +<figure class="wp-block-table"><table class="has-fixed-layout"> +<thead><tr><th>Header 1</th><th>Header 2</th></tr></thead> +<tbody><tr><td>Cell 1</td><td>Cell 2</td></tr><tr><td>Cell 3</td><td>Cell 4</td></tr></tbody> +</table></figure> +<!-- /wp:table --> +HTML, + 'expected' => <<<MD +| Header 1 | Header 2 | +|----------|----------| +| Cell 1 | Cell 2 | +| Cell 3 | Cell 4 | + +MD + ], + ]; + } + + public function test_blocks_to_markdown_excerpt() { + $input = file_get_contents(__DIR__ . '/fixtures/blocks-to-markdown/excerpt.input.html'); + $converter = new WP_Blocks_To_Markdown($input); + $converter->convert(); + $markdown = $converter->get_result(); + + $output_file = __DIR__ . '/fixtures/blocks-to-markdown/excerpt.output.md'; + if (getenv('UPDATE_FIXTURES')) { + file_put_contents($output_file, $markdown); + } + + $this->assertEquals(file_get_contents($output_file), $markdown); + } + + public function test_metadata_preservation_with_frontmatter() { + $blocks = <<<HTML +<!-- wp:heading {"level":1} --> +<h1>Brian Chesky – Founder Mode & The Art of Hiring</h1> +<!-- /wp:heading --> + +<!-- wp:paragraph --> +<p>Here are the key insights...</p> +<!-- /wp:paragraph --> +HTML; + + $metadata = [ + 'title' => 'Brian Chesky – Founder Mode & The Art of Hiring' + ]; + + $converter = new WP_Blocks_To_Markdown($blocks, $metadata); + $converter->convert(); + $markdown = $converter->get_result(); + + $expected = <<<MD +--- +title: "Brian Chesky \u2013 Founder Mode & The Art of Hiring" +--- + +# Brian Chesky – Founder Mode & The Art of Hiring + +Here are the key insights... + + +MD; + $this->assertEquals($expected, $markdown); + } +} diff --git a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php index 91359b9e47..83f03e8ed7 100644 --- a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php +++ b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php @@ -35,7 +35,7 @@ public function test_metadata_extraction() { * @dataProvider provider_test_conversion */ public function test_html_to_blocks_conversion( $html, $expected ) { - $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $html ) ); + $converter = new WP_HTML_To_Blocks( WP_HTML_Processor::create_fragment( $html ) ); $converter->convert( $html ); $blocks = $converter->get_block_markup(); @@ -43,15 +43,11 @@ public function test_html_to_blocks_conversion( $html, $expected ) { } private function normalize_markup( $markup ) { - $processor = new WP_HTML_Processor( $markup ); + $processor = WP_HTML_Processor::create_fragment( $markup ); $serialized = $processor->serialize(); $serialized = trim( str_replace( [ - // Naively remove parts of the HTML that serialize() - // adds that we don't want. - '<html><head></head><body>', - '</body></html>', // Even more naively, remove all the newlines. "\n" ], diff --git a/packages/playground/data-liberation/tests/WPMarkdownToBlocksTests.php b/packages/playground/data-liberation/tests/WPMarkdownToBlocksTests.php new file mode 100644 index 0000000000..852c4f9d2c --- /dev/null +++ b/packages/playground/data-liberation/tests/WPMarkdownToBlocksTests.php @@ -0,0 +1,149 @@ +<?php + +use PHPUnit\Framework\TestCase; + +class WPMarkdownToBlocksTests extends TestCase { + + public function test_metadata_extraction() { + $markdown = <<<MD +--- +post_title: "WordPress 6.8 was released" +post_date: "2024-12-16" +post_modified: "2024-12-16" +post_author: "1" +post_author_name: "The WordPress Team" +post_author_url: "https://wordpress.org" +post_author_avatar: "https://wordpress.org/wp-content/uploads/2024/04/wordpress-logo-2024.png" +--- + +# WordPress 6.8 was released + +Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library. +MD; + $converter = new WP_Markdown_To_Blocks($markdown); + $this->assertTrue($converter->convert()); + $metadata = $converter->get_all_metadata(); + $expected_metadata = [ + 'post_title' => ['WordPress 6.8 was released'], + 'post_date' => ['2024-12-16'], + 'post_modified' => ['2024-12-16'], + 'post_author' => ['1'], + 'post_author_name' => ['The WordPress Team'], + 'post_author_url' => ['https://wordpress.org'], + 'post_author_avatar' => ['https://wordpress.org/wp-content/uploads/2024/04/wordpress-logo-2024.png'], + ]; + $this->assertEquals($expected_metadata, $metadata); + } + + /** + * @dataProvider provider_test_conversion + */ + public function test_markdown_to_blocks_conversion($markdown, $expected) { + $converter = new WP_Markdown_To_Blocks($markdown); + $converter->convert(); + $blocks = $converter->get_block_markup(); + + $this->assertEquals($this->normalize_markup($expected), $this->normalize_markup($blocks)); + } + + private function normalize_markup($markup) { + $processor = WP_HTML_Processor::create_fragment($markup); + $serialized = $processor->serialize(); + $serialized = trim( + str_replace( + [ + // Even more naively, remove all the newlines. + "\n" + ], + '', + $serialized + ) + ); + return $serialized; + } + + public function provider_test_conversion() { + return [ + 'A simple paragraph' => [ + 'markdown' => 'A simple paragraph', + 'expected' => "<!-- wp:paragraph --><p>A simple paragraph</p><!-- /wp:paragraph -->" + ], + 'A simple list' => [ + 'markdown' => "- Item 1\n- Item 2", + 'expected' => <<<HTML +<!-- wp:list {"ordered":false} --><ul class="wp-block-list"><!-- wp:list-item --><li>Item 1</li><!-- /wp:list-item --><!-- wp:list-item --><li>Item 2</li><!-- /wp:list-item --></ul><!-- /wp:list --> +HTML + ], + 'A nested list' => [ + 'markdown' => "- Item 1\n - Item 1.1\n - Item 1.2\n- Item 2", + 'expected' => <<<HTML +<!-- wp:list {"ordered":false} --><ul class="wp-block-list"><!-- wp:list-item --><li>Item 1<!-- wp:list {"ordered":false} --><ul class="wp-block-list"><!-- wp:list-item --><li>Item 1.1</li><!-- /wp:list-item --><!-- wp:list-item --><li>Item 1.2</li><!-- /wp:list-item --></ul><!-- /wp:list --></li><!-- /wp:list-item --><!-- wp:list-item --><li>Item 2</li><!-- /wp:list-item --></ul><!-- /wp:list --> +HTML + ], + 'An image' => [ + 'markdown' => '![An image](https://w.org/logo.png)', + 'expected' => "<!-- wp:paragraph --><p><img alt=\"An image\" src=\"https://w.org/logo.png\"></p><!-- /wp:paragraph -->" + ], + 'A heading' => [ + 'markdown' => '#### A simple heading', + 'expected' => "<!-- wp:heading {\"level\":4} --><h4>A simple heading</h4><!-- /wp:heading -->" + ], + 'A link inside a paragraph' => [ + 'markdown' => 'A simple paragraph with a [link](https://wordpress.org)', + 'expected' => "<!-- wp:paragraph --><p>A simple paragraph with a <a href=\"https://wordpress.org\">link</a></p><!-- /wp:paragraph -->" + ], + 'Formatted text' => [ + 'markdown' => '**Bold** and *Italic*', + 'expected' => "<!-- wp:paragraph --><p><b>Bold</b> and <em>Italic</em></p><!-- /wp:paragraph -->" + ], + 'A blockquote' => [ + 'markdown' => '> A simple blockquote', + 'expected' => "<!-- wp:quote --><blockquote class=\"wp-block-quote\"><!-- wp:paragraph --><p>A simple blockquote</p><!-- /wp:paragraph --></blockquote><!-- /wp:quote -->" + ], + 'A table' => [ + 'markdown' => <<<MD +| Header 1 | Header 2 | +|----------|----------| +| Cell 1 | Cell 2 | +| Cell 3 | Cell 4 | +MD, + 'expected' => <<<HTML +<!-- wp:table --><figure class="wp-block-table"><table class="has-fixed-layout"><thead><tr><th>Header 1</th><th>Header 2</th></tr></thead><tbody><tr><td>Cell 1</td><td>Cell 2</td></tr><tr><td>Cell 3</td><td>Cell 4</td></tr></tbody></table></figure><!-- /wp:table --> +HTML + ], + ]; + } + + public function test_markdown_to_blocks_excerpt() { + $input = file_get_contents(__DIR__ . '/fixtures/markdown-to-blocks/excerpt.input.md'); + $converter = new WP_Markdown_To_Blocks($input); + $converter->convert(); + $blocks = $converter->get_block_markup(); + + $output_file = __DIR__ . '/fixtures/markdown-to-blocks/excerpt.output.html'; + if (getenv('UPDATE_FIXTURES')) { + file_put_contents($output_file, $blocks); + } + + $this->assertEquals(file_get_contents($output_file), $blocks); + } + + public function test_frontmatter_extraction() { + $markdown = <<<MD +--- +title: "Brian Chesky – Founder Mode & The Art of Hiring" +--- + +# Brian Chesky – Founder Mode & The Art of Hiring + +Here are the key insights... +MD; + $converter = new WP_Markdown_To_Blocks($markdown); + $converter->convert(); + $metadata = $converter->get_all_metadata(); + $expected_metadata = [ + 'title' => ['Brian Chesky – Founder Mode & The Art of Hiring'] + ]; + $this->assertEquals($expected_metadata, $metadata); + } +} diff --git a/packages/playground/data-liberation/tests/bootstrap.php b/packages/playground/data-liberation/tests/bootstrap.php new file mode 100644 index 0000000000..534576d047 --- /dev/null +++ b/packages/playground/data-liberation/tests/bootstrap.php @@ -0,0 +1,4 @@ +<?php + +require_once __DIR__ . '/../bootstrap.php'; +require_once __DIR__ . '/../../data-liberation-markdown/src/bootstrap.php'; \ No newline at end of file From f9eb988cf22d0613a36b5279bdf5042319e578dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com> Date: Fri, 20 Dec 2024 19:30:41 +0100 Subject: [PATCH 22/26] Two way Blocks <-> Markdown conversion pipeline --- .../src/WP_Blocks_To_Markdown.php | 79 +++++++++++++++- .../src/WP_Markdown_To_Blocks.php | 3 +- .../blueprint.json | 4 - .../plugin.php | 92 +------------------ .../playground/data-liberation/bootstrap.php | 1 + .../src/WP_Data_Liberation_HTML_Processor.php | 54 +++++++++++ .../WP_Block_Markup_Entity_Reader.php | 4 +- .../entity-readers/WP_HTML_Entity_Reader.php | 4 +- 8 files changed, 142 insertions(+), 99 deletions(-) create mode 100644 packages/playground/data-liberation/src/WP_Data_Liberation_HTML_Processor.php diff --git a/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php b/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php index 1530986235..05ae218810 100644 --- a/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php +++ b/packages/playground/data-liberation-markdown/src/WP_Blocks_To_Markdown.php @@ -67,6 +67,75 @@ private function block_to_markdown($block) { $content = $this->html_to_markdown($inner_html); return str_repeat('#', $level) . ' ' . $content . "\n\n"; + case 'core/table': + // Accumulate all the table contents to compute the markdown + // column widths. + $processor = WP_Data_Liberation_HTML_Processor::create_fragment($inner_html); + $rows = []; + $header = []; + $in_header = false; + $current_row = []; + + while ($processor->next_token()) { + if ($processor->get_token_type() !== '#tag') { + continue; + } + + $tag = $processor->get_tag(); + $is_closer = $processor->is_tag_closer(); + + if ($tag === 'THEAD' && !$is_closer) { + $in_header = true; + } else if ($tag === 'THEAD' && $is_closer) { + $in_header = false; + } else if ($tag === 'TR' && $is_closer) { + if ($in_header) { + $header = $current_row; + } else { + $rows[] = $current_row; + } + $current_row = []; + } else if (($tag === 'TH' || $tag === 'TD') && !$is_closer) { + $cell_content = $processor->get_inner_html(); + $current_row[] = $this->html_to_markdown($cell_content); + $processor->skip_to_closer(); + } + } + + if (empty($header) && !empty($rows)) { + $header = array_shift($rows); + } + + if (empty($header)) { + return ''; + } + + $col_widths = array_map('strlen', $header); + foreach ($rows as $row) { + foreach ($row as $i => $cell) { + $col_widths[$i] = max($col_widths[$i], strlen($cell)); + } + } + + $padded_header = array_map(function($cell, $width) { + return str_pad($cell, $width); + }, $header, $col_widths); + $markdown = "| " . implode(" | ", $padded_header) . " |\n"; + + $separator_cells = array_map(function($width) { + return str_repeat("-", $width + 2); + }, $col_widths); + $markdown .= "|" . implode("|", $separator_cells) . "|\n"; + + foreach ($rows as $row) { + $padded_cells = array_map(function($cell, $width) { + return str_pad($cell, $width); + }, $row, $col_widths); + $markdown .= "| " . implode(" | ", $padded_cells) . " |\n"; + } + + return $markdown . "\n"; + case 'core/list': array_push($this->state['listStyle'], array( 'style' => isset($attributes['ordered']) ? ($attributes['type'] ?? 'decimal') : '-', @@ -138,7 +207,7 @@ private function block_to_markdown($block) { } private function html_to_markdown($html, $parents = []) { - $processor = WP_HTML_Processor::create_fragment($html); + $processor = WP_Data_Liberation_HTML_Processor::create_fragment($html); $markdown = ''; while ($processor->next_token()) { @@ -170,6 +239,11 @@ private function html_to_markdown($html, $parents = []) { $markdown .= '*'; break; + case '+DEL': + case '-DEL': + $markdown .= '~~'; + break; + case '+CODE': case '-CODE': if(!$this->has_parent('core/code')){ @@ -192,6 +266,9 @@ private function html_to_markdown($html, $parents = []) { } } + // The HTML processor gives us all the whitespace verbatim + // as it was encountered in the byte stream. + // Let's normalize it to a single space. $markdown = trim($markdown, "\n "); $markdown = preg_replace('/ +/', ' ', $markdown); $markdown = preg_replace('/\n+/', "\n", $markdown); diff --git a/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php index 4f1aabda85..94da3e484c 100644 --- a/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php +++ b/packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php @@ -179,7 +179,8 @@ private function convert_markdown_to_blocks() { break; case Block\Paragraph::class: - if ( $this->current_block()->block_name === 'list-item' ) { + $current_block = $this->current_block(); + if ( $current_block && $current_block->block_name === 'list-item' ) { break; } $this->push_block( 'paragraph' ); diff --git a/packages/playground/data-liberation-static-files-editor/blueprint.json b/packages/playground/data-liberation-static-files-editor/blueprint.json index 0e3b539bdf..76dcfc3a79 100644 --- a/packages/playground/data-liberation-static-files-editor/blueprint.json +++ b/packages/playground/data-liberation-static-files-editor/blueprint.json @@ -14,10 +14,6 @@ { "step": "activatePlugin", "pluginPath": "z-data-liberation-markdown/plugin.php" - }, - { - "step": "activatePlugin", - "pluginPath": "z-data-liberation-static-files-editor/plugin.php" } ] } diff --git a/packages/playground/data-liberation-static-files-editor/plugin.php b/packages/playground/data-liberation-static-files-editor/plugin.php index cf869b7eb6..2d0ea48b36 100644 --- a/packages/playground/data-liberation-static-files-editor/plugin.php +++ b/packages/playground/data-liberation-static-files-editor/plugin.php @@ -22,94 +22,6 @@ class WP_Static_Files_Editor_Plugin { static public function register_hooks() { register_activation_hook( __FILE__, array(self::class, 'import_static_pages') ); add_action('save_post', array(self::class, 'on_save_post')); - add_action('init', function() { - $converter = new WP_Blocks_To_Markdown(<<<HTML -<!-- wp:heading {"level":1} --> -<h1>WordPress 6.8 was released</h1> -<!-- /wp:heading --> - -<!-- wp:paragraph --> -<p>Last week, WordPress 6.8 <b>was released</b>. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.</p> -<!-- /wp:paragraph --> - -<!-- wp:list --> -<ul> - <!-- wp:list-item --> - <li>Major Features - <!-- wp:list --> - <ul> - <!-- wp:list-item --> - <li>Block Editor Updates - <!-- wp:list --> - <ul> - <!-- wp:list-item --> - <li>New <code>block patterns</code> added</li> - <!-- /wp:list-item --> - - <!-- wp:list-item --> - <li>Improved performance - - Hey Hey - - <b>More lines</b> - <!-- wp:list --> - <ul> - <!-- wp:list-item --> - <li>New <code>block - Hey Hey - - <b>More lines</b> patterns</code> added</li> - <!-- /wp:list-item --> - <!-- wp:list-item --> - <li>Improved performance</li> - <!-- /wp:list-item --> - </ul> - <!-- /wp:list --> - </li> - <!-- /wp:list-item --> - <!-- wp:list-item --> - <li>Improved performance</li> - <!-- /wp:list-item --> - </ul> - <!-- /wp:list --> - </li> - <!-- /wp:list-item --> - <!-- wp:list-item --> - <li>Improved performance</li> - <!-- /wp:list-item --> - <!-- wp:list-item --> - <li>Improved performance</li> - <!-- /wp:list-item --> - </ul> - <!-- /wp:list --> - </li> - <!-- /wp:list-item --> -</ul> -<!-- /wp:list --> - -<!-- wp:quote --> -<blockquote class="wp-block-quote"> -<!-- wp:code --> -<pre class="wp-block-code"><code>function hello() { - console.log("Hello world!"); -}</code></pre> -<!-- /wp:code --> -</blockquote> -<!-- /wp:quote --> - -<!-- wp:table --> -<figure class="wp-block-table"><table class="has-fixed-layout"> -<thead><tr><th>Header 1</th><th>Header 2</th></tr></thead> -<tbody><tr><td>Cell 1</td><td>Cell 2</td></tr><tr><td>Cell 3</td><td>Cell 4</td></tr></tbody> -</table></figure> -<!-- /wp:table --> - -HTML); - echo '<plaintext>'; - $converter->convert(); - var_dump($converter->get_result()); - die(); - }); } /** @@ -187,7 +99,9 @@ static public function on_save_post($post_id) { } // self::deltree(WP_STATIC_CONTENT_DIR); - mkdir(WP_STATIC_CONTENT_DIR, 0777, true); + if(!is_dir(WP_STATIC_CONTENT_DIR)) { + mkdir(WP_STATIC_CONTENT_DIR, 0777, true); + } self::save_db_pages_as_html(WP_STATIC_CONTENT_DIR); } diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index 0c8168f3da..7627b23d01 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -81,6 +81,7 @@ require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php'; require_once __DIR__ . '/src/import/WP_Import_HTML_Processor.php'; require_once __DIR__ . '/src/import/WP_Import_Utils.php'; +require_once __DIR__ . '/src/WP_Data_Liberation_HTML_Processor.php'; require_once __DIR__ . '/src/utf8_decoder.php'; // When running in Playground, the composer autoloader script sees CLI SAPI and diff --git a/packages/playground/data-liberation/src/WP_Data_Liberation_HTML_Processor.php b/packages/playground/data-liberation/src/WP_Data_Liberation_HTML_Processor.php new file mode 100644 index 0000000000..c7370f14b5 --- /dev/null +++ b/packages/playground/data-liberation/src/WP_Data_Liberation_HTML_Processor.php @@ -0,0 +1,54 @@ +<?php + +class WP_Data_Liberation_HTML_Processor extends WP_HTML_Processor { + + public function get_inner_html() { + if ( '#tag' !== $this->get_token_type() ) { + return false; + } + + if ( $this->is_tag_closer() ) { + return false; + } + + if ( false === WP_HTML_Tag_Processor::set_bookmark( 'tag-start' ) ) { + return false; + } + + $this->skip_to_closer(); + + if ( false === WP_HTML_Tag_Processor::set_bookmark( 'tag-end' ) ) { + WP_HTML_Tag_Processor::release_bookmark( 'tag-start' ); + return false; + } + + $inner_html_start = $this->bookmarks['tag-start']->start + $this->bookmarks['tag-start']->length; + $inner_html_end = $this->bookmarks['tag-end']->start - $inner_html_start; + + WP_HTML_Tag_Processor::seek( 'tag-start' ); + WP_HTML_Tag_Processor::release_bookmark( 'tag-start' ); + WP_HTML_Tag_Processor::release_bookmark( 'tag-end' ); + + return substr( + $this->html, + $inner_html_start, + $inner_html_end + ); + } + + public function skip_to_closer() { + $starting_depth = $this->get_current_depth(); + while ( $this->next_token() ) { + if ( + $this->get_token_type() === '#tag' && + $this->is_tag_closer() && + $this->get_current_depth() === $starting_depth - 1 + ) { + return true; + } + } + + return false; + } + +} diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php index 17f450051c..7c707bd615 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Block_Markup_Entity_Reader.php @@ -72,8 +72,8 @@ public function next_entity() { 'post_meta', array( 'post_id' => $this->post_id, - 'meta_key' => $key, - 'meta_value' => $value, + 'key' => $key, + 'value' => $value, ) ); } diff --git a/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php index 92d47ac27f..aef6041666 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_HTML_Entity_Reader.php @@ -70,8 +70,8 @@ public function next_entity() { 'post_meta', array( 'post_id' => $this->post_id, - 'meta_key' => $key, - 'meta_value' => $value, + 'key' => $key, + 'value' => $value, ) ); } From 63c1f6b2549e7c6ec278eade442705d370a5a304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com> Date: Fri, 20 Dec 2024 20:07:53 +0100 Subject: [PATCH 23/26] Early concept of synchronizing page saves to local files --- .../blueprint.json | 4 + .../plugin.php | 255 ++++++++++-------- .../WP_Filesystem_Entity_Reader.php | 1 + 3 files changed, 154 insertions(+), 106 deletions(-) diff --git a/packages/playground/data-liberation-static-files-editor/blueprint.json b/packages/playground/data-liberation-static-files-editor/blueprint.json index 76dcfc3a79..0e3b539bdf 100644 --- a/packages/playground/data-liberation-static-files-editor/blueprint.json +++ b/packages/playground/data-liberation-static-files-editor/blueprint.json @@ -14,6 +14,10 @@ { "step": "activatePlugin", "pluginPath": "z-data-liberation-markdown/plugin.php" + }, + { + "step": "activatePlugin", + "pluginPath": "z-data-liberation-static-files-editor/plugin.php" } ] } diff --git a/packages/playground/data-liberation-static-files-editor/plugin.php b/packages/playground/data-liberation-static-files-editor/plugin.php index 2d0ea48b36..423336b481 100644 --- a/packages/playground/data-liberation-static-files-editor/plugin.php +++ b/packages/playground/data-liberation-static-files-editor/plugin.php @@ -6,7 +6,7 @@ use WordPress\Filesystem\WP_Filesystem; if ( ! defined( 'WP_STATIC_CONTENT_DIR' ) ) { - define( 'WP_STATIC_CONTENT_DIR', WP_CONTENT_DIR . '/uploads/static-pages' ); + define( 'WP_STATIC_CONTENT_DIR', WP_CONTENT_DIR . '/uploads/static-pages' ); } if(isset($_GET['dump'])) { @@ -21,7 +21,8 @@ class WP_Static_Files_Editor_Plugin { static public function register_hooks() { register_activation_hook( __FILE__, array(self::class, 'import_static_pages') ); - add_action('save_post', array(self::class, 'on_save_post')); + add_action('save_post', array(self::class, 'on_save_post'), 10, 3); + add_action('before_delete_post', array(self::class, 'on_delete_post')); } /** @@ -69,139 +70,181 @@ function () { * @TODO: Make it work with MySQL, right now it uses SQLite-specific code. */ static private function reset_db_data() { - $GLOBALS['@pdo']->query('DELETE FROM wp_posts WHERE id > 0'); - $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_posts'"); - - $GLOBALS['@pdo']->query('DELETE FROM wp_postmeta WHERE post_id > 1'); - $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=20 WHERE NAME='wp_postmeta'"); + $GLOBALS['@pdo']->query('DELETE FROM wp_posts WHERE id > 0'); + $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_posts'"); + + $GLOBALS['@pdo']->query('DELETE FROM wp_postmeta WHERE post_id > 1'); + $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=20 WHERE NAME='wp_postmeta'"); - $GLOBALS['@pdo']->query('DELETE FROM wp_comments'); - $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_comments'"); + $GLOBALS['@pdo']->query('DELETE FROM wp_comments'); + $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_comments'"); - $GLOBALS['@pdo']->query('DELETE FROM wp_commentmeta'); - $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_commentmeta'"); + $GLOBALS['@pdo']->query('DELETE FROM wp_commentmeta'); + $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_commentmeta'"); } /** - * Recreate the entire file structure when any post is saved. - * - * Why recreate? - * - * It's easier to recreate the entire file structure than to keep track of - * which files have been added, deleted, renamed and moved under - * another parent, or changed via a direct SQL query. + * Handle post deletion by removing associated files and directories */ - static public function on_save_post($post_id) { - // Prevent collisions between the initial create_db_pages_from_html_files call - // process and the save_post_page hook. - if (self::$importing) { + static public function on_delete_post($post_id) { + if (get_post_type($post_id) !== 'page') { return; } - - // self::deltree(WP_STATIC_CONTENT_DIR); - if(!is_dir(WP_STATIC_CONTENT_DIR)) { - mkdir(WP_STATIC_CONTENT_DIR, 0777, true); + + $source_path = get_post_meta($post_id, 'source_path_relative', true); + if (!empty($source_path)) { + $full_path = WP_STATIC_CONTENT_DIR . '/' . $source_path; + $dir_path = dirname($full_path); + + // Delete the file + if (file_exists($full_path)) { + unlink($full_path); + } + + // If this was a parent page with index.md, delete its directory too + if (basename($full_path) === 'index.md') { + self::deltree($dir_path); + } + } + } + + /** + * Recursively delete a directory and its contents + */ + static private function deltree($dir) { + $files = array_diff(scandir($dir), array('.','..')); + foreach ($files as $file) { + $path = "$dir/$file"; + is_dir($path) ? self::deltree($path) : unlink($path); } - self::save_db_pages_as_html(WP_STATIC_CONTENT_DIR); + return rmdir($dir); } - static private function save_db_pages_as_html($path, $parent_id = null) { - if (!file_exists($path)) { - mkdir($path, 0777, true); + /** + * Handle post saving and file organization + */ + static public function on_save_post($post_id, $post, $update) { + if (self::$importing || get_post_type($post_id) !== 'page') { + return; } - $args = array( - 'post_type' => 'page', - 'posts_per_page' => -1, - 'post_parent' => $parent_id, - 'post_status' => 'publish', - ); - $pages = new WP_Query($args); + $parent_id = wp_get_post_parent_id($post_id); + $content_converter = get_post_meta($post_id, 'content_converter', true) ?: 'md'; + $old_relative_path = get_post_meta($post_id, 'source_path_relative', true); + + $new_relative_path = $old_relative_path; + if (empty($new_relative_path)) { + $new_relative_path = sanitize_title($post->post_title) . '.' . $content_converter; + } - if ($pages->have_posts()) { - while ($pages->have_posts()) { - $pages->the_post(); - $page_id = get_the_ID(); - $page = get_post($page_id); + // Determine the new relative path + if ($parent_id) { + $parent_file_path = get_post_meta($parent_id, 'source_path_relative', true); - $content_converter = get_post_meta($page_id, 'content_converter', true); - if(empty($content_converter)) { - $content_converter = 'md'; - } + // If parent file exists but isn't in a subdirectory, move it + if(!file_exists(WP_STATIC_CONTENT_DIR . '/' . $parent_file_path)) { + // @TODO: error handling. Maybe just backfill the paths? + throw new Exception('Parent file does not exist: ' . WP_STATIC_CONTENT_DIR . '/' . $parent_file_path); + } - $title_block = ( - WP_Import_Utils::block_opener('heading', array('level' => 1)) . - '<h1>' . esc_html(get_the_title()) . '</h1>' . - WP_Import_Utils::block_closer('heading') + $parent_filename = basename($parent_file_path, '.md'); + if('index' !== $parent_filename) { + $swap_file = $parent_file_path . '.swap'; + rename( + WP_STATIC_CONTENT_DIR . '/' . $parent_file_path, + WP_STATIC_CONTENT_DIR . '/' . $swap_file ); - $block_markup = $title_block . $page->post_content; - - switch($content_converter) { - case 'html': - case 'xhtml': - // @TODO: Implement a Blocks to HTML converter. - break; - case 'md': - default: - $converter = new WP_Blocks_To_Markdown( - $block_markup, - array( - 'title' => get_the_title(), - ) - ); - if(false === $converter->convert()) { - // @TODO: error handling. - } - $content = $converter->get_result(); - break; - } + $parent_dir = dirname($parent_file_path) . '/' . basename($parent_file_path, '.md'); + mkdir(WP_STATIC_CONTENT_DIR . '/' . $parent_dir, 0777, true); - $child_pages = get_pages(array('child_of' => $page_id, 'post_type' => 'page')); + $parent_file_path = $parent_dir . '/index.md'; + rename( + WP_STATIC_CONTENT_DIR . '/' . $swap_file, + WP_STATIC_CONTENT_DIR . '/' . $parent_file_path + ); + update_post_meta($parent_id, 'source_path_relative', $parent_file_path); + + $new_relative_path = $parent_dir . '/' . $new_relative_path; + } + } - if (!file_exists($path)) { - mkdir($path, 0777, true); - } + // Handle file moves for existing pages + if (!empty($old_relative_path) && $old_relative_path !== $new_relative_path) { + $old_path = WP_STATIC_CONTENT_DIR . '/' . $old_relative_path; + $new_path = WP_STATIC_CONTENT_DIR . '/' . $new_relative_path; + + // Create parent directory if needed + if (!is_dir(dirname($new_path))) { + mkdir(dirname($new_path), 0777, true); + } - $source_path_relative = get_post_meta($page_id, 'source_path_relative', true); - if(empty($source_path_relative)) { - $title = sanitize_title(get_the_title()); - $source_path_relative = $page->menu_order . '_' . $title . '.' . $content_converter; - } - $source_file_path = WP_STATIC_CONTENT_DIR . '/' . $source_path_relative; - if (!empty($child_pages)) { - if(is_dir($source_file_path)) { - $dirname = $source_file_path; - } else { - $dirname = dirname($source_file_path); - mkdir($dirname, 0777, true); - } - file_put_contents($source_file_path . '/index.' . $content_converter, $content); - self::save_db_pages_as_html($dirname, $page_id); - } else { - file_put_contents($source_file_path, $content); - } + // Move the file/directory + if (file_exists($old_path)) { + rename($old_path, $new_path); } + + // Clean up empty directories + // $old_dir = dirname($old_path); + // if (is_dir($old_dir) && !(new \FilesystemIterator($old_dir))->valid()) { + // rmdir($old_dir); + // } + // Update the source path meta } - wp_reset_postdata(); + + update_post_meta($post_id, 'source_path_relative', $new_relative_path); + // Save the content + self::save_page_content($post_id); } - static private function deltree($path) { - if (!file_exists($path)) { - return; + /** + * Save a single page's content to file + */ + static private function save_page_content($page_id) { + $page = get_post($page_id); + $content_converter = get_post_meta($page_id, 'content_converter', true) ?: 'md'; + + $title_block = ( + WP_Import_Utils::block_opener('heading', array('level' => 1)) . + '<h1>' . esc_html(get_the_title($page_id)) . '</h1>' . + WP_Import_Utils::block_closer('heading') + ); + $block_markup = $title_block . $page->post_content; + + switch($content_converter) { + case 'html': + case 'xhtml': + // @TODO: Implement a Blocks to HTML converter – OR just render + // the blocks. + break; + case 'md': + default: + $converter = new WP_Blocks_To_Markdown( + $block_markup, + array( + 'title' => get_the_title($page_id), + ) + ); + if(false === $converter->convert()) { + // @TODO: error handling. + return; + } + $content = $converter->get_result(); + break; } - $iterator = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($path), RecursiveIteratorIterator::CHILD_FIRST); - foreach ($iterator as $file) { - /** @var SplFileInfo $file */ - if ($file->isDir()) { - rmdir($file->getRealPath()); - } else if($file->isFile()) { - unlink($file->getRealPath()); + $source_path_relative = get_post_meta($page_id, 'source_path_relative', true); + if($source_path_relative) { + $source_file_path = WP_STATIC_CONTENT_DIR . '/' . $source_path_relative; + + // Ensure directory exists + if (!is_dir(dirname($source_file_path))) { + mkdir(dirname($source_file_path), 0777, true); } - } - rmdir($path); + // Save the content + file_put_contents($source_file_path, $content); + } } } diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php index 5837bbdc7b..668ea185a5 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php @@ -92,6 +92,7 @@ public function next_entity(): bool { $data['guid'] = $post_tree_node['source_path_relative']; $data['post_parent'] = $post_tree_node['parent_id']; $data['post_title'] = $data['post_title'] ?? null; + $data['post_status'] = 'publish'; $data['post_type'] = 'page'; if ( ! $data['post_title'] ) { $data['post_title'] = WP_Import_Utils::slug_to_title( basename( $post_tree_node['source_path_relative'] ) ); From 9b65f9214ae37b6b92760476af77fb43878a471b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com> Date: Sun, 22 Dec 2024 19:34:01 +0100 Subject: [PATCH 24/26] Propagate WP Admin changes to the local disk directory --- .../src/lib/steps/activate-plugin.ts | 117 ++++- .../WP_Static_File_Sync.php | 492 ++++++++++++++++++ .../blueprint.json | 1 + .../phpunit.xml | 14 + .../plugin.php | 239 +++------ .../run.sh | 3 + .../tests/WPStaticFileSyncTests.php | 125 +++++ .../tests/bootstrap.php | 8 + .../pride-and-prejudice/index.md | 1 + .../playground/data-liberation/bootstrap.php | 14 + .../WP_Directory_Tree_Entity_Reader.php | 24 +- .../WP_Filesystem_Entity_Reader.php | 17 +- .../WP_Filesystem_To_Post_Tree.php | 19 +- .../data-liberation/src/functions.php | 12 + .../src/import/WP_Entity_Importer.php | 1 - .../src/import/WP_Stream_Importer.php | 4 +- .../tests/WPHTMLEntityReaderTests.php | 7 +- .../tests/WPHTMLToBlocksTests.php | 4 +- 18 files changed, 867 insertions(+), 235 deletions(-) create mode 100644 packages/playground/data-liberation-static-files-editor/WP_Static_File_Sync.php create mode 100644 packages/playground/data-liberation-static-files-editor/phpunit.xml create mode 100644 packages/playground/data-liberation-static-files-editor/tests/WPStaticFileSyncTests.php create mode 100644 packages/playground/data-liberation-static-files-editor/tests/bootstrap.php create mode 100644 packages/playground/data-liberation-static-files-editor/tests/static-files-tests/pride-and-prejudice/index.md diff --git a/packages/playground/blueprints/src/lib/steps/activate-plugin.ts b/packages/playground/blueprints/src/lib/steps/activate-plugin.ts index 0d08958340..706b0a8211 100644 --- a/packages/playground/blueprints/src/lib/steps/activate-plugin.ts +++ b/packages/playground/blueprints/src/lib/steps/activate-plugin.ts @@ -1,7 +1,5 @@ -import { phpVar } from '@php-wasm/util'; import { StepHandler } from '.'; import { logger } from '@php-wasm/logger'; - /** * @inheritDoc activatePlugin * @example @@ -39,18 +37,18 @@ export const activatePlugin: StepHandler<ActivatePluginStep> = async ( progress?.tracker.setCaption(`Activating ${pluginName || pluginPath}`); const docroot = await playground.documentRoot; - const result = await playground.run({ + const activatePluginResult = await playground.run({ code: `<?php define( 'WP_ADMIN', true ); - require_once( ${phpVar(docroot)}. "/wp-load.php" ); - require_once( ${phpVar(docroot)}. "/wp-admin/includes/plugin.php" ); + require_once( getenv('DOCROOT') . "/wp-load.php" ); + require_once( getenv('DOCROOT') . "/wp-admin/includes/plugin.php" ); // Set current user to admin wp_set_current_user( get_users(array('role' => 'Administrator') )[0]->ID ); - $plugin_path = ${phpVar(pluginPath)}; + $plugin_path = getenv('PLUGIN_PATH'); $response = false; - if (!is_dir($plugin_path)) { + if ( ! is_dir( $plugin_path)) { $response = activate_plugin($plugin_path); } @@ -65,22 +63,101 @@ export const activatePlugin: StepHandler<ActivatePluginStep> = async ( } } - if ( null === $response ) { - die('Plugin activated successfully'); - } else if ( is_wp_error( $response ) ) { - throw new Exception( $response->get_error_message() ); + if ( is_wp_error($response) ) { + die( $response->get_error_message() ); + } else if ( false === $response ) { + die( "The activatePlugin step wasn't able to find the plugin $plugin_path." ); } - - throw new Exception( 'Unable to activate plugin' ); `, + env: { + PLUGIN_PATH: pluginPath, + DOCROOT: docroot, + }, }); - if (result.text !== 'Plugin activated successfully') { - logger.debug(result); - throw new Error( - `Plugin ${pluginPath} could not be activated – WordPress exited with no error. ` + - `Sometimes, when $_SERVER or site options are not configured correctly, ` + - `WordPress exits early with a 301 redirect. ` + - `Inspect the "debug" logs in the console for more details` + if (activatePluginResult.text) { + logger.warn( + `Plugin ${pluginPath} activation printed the following bytes: ${activatePluginResult.text}` ); } + + /** + * Instead of checking the plugin activation response, + * check if the plugin is active by looking at the active plugins list. + * + * We have to split the activation and the check into two PHP runs + * because some plugins might redirect during activation, + * which would prevent any output that happens after activation from being returned. + * + * Relying on the plugin activation response is not reliable because if the plugin activation + * produces any output, WordPress will assume it's an activation error and return a WP_Error. + * WordPress will still activate the plugin and load the required page, + * but it will also show the error as a notice in wp-admin. + * See WordPress source code for more details: + * https://github.com/WordPress/wordpress-develop/blob/6.7/src/wp-admin/includes/plugin.php#L733 + * + * Because some plugins can create an output, we need to use output buffering + * to ensure the 'true' response is not polluted by other outputs. + * If the plugin activation fails, we will return the buffered output as it might + * contain more information about the failure. + */ + const isActiveCheckResult = await playground.run({ + code: `<?php + ob_start(); + require_once( getenv( 'DOCROOT' ) . "/wp-load.php" ); + + /** + * Extracts the relative plugin path from either an absolute or relative plugin path. + * + * Absolute paths starting with plugin directory (e.g., '/wordpress/wp-content/plugins/test-plugin/index.php') + * should be converted to relative paths (e.g., 'test-plugin/index.php') + * + * Directories should finish with a trailing slash to ensure we match the full plugin directory name. + * + * Examples: + * - '/wordpress/wp-content/plugins/test-plugin/index.php' → 'test-plugin/index.php' + * - '/wordpress/wp-content/plugins/test-plugin/' → 'test-plugin/' + * - '/wordpress/wp-content/plugins/test-plugin' → 'test-plugin/' + * - 'test-plugin/index.php' → 'test-plugin/index.php' + * - 'test-plugin/' → 'test-plugin/' + * - 'test-plugin' → 'test-plugin/' + */ + $plugin_directory = WP_PLUGIN_DIR . '/'; + $relative_plugin_path = getenv( 'PLUGIN_PATH' ); + if (strpos($relative_plugin_path, $plugin_directory) === 0) { + $relative_plugin_path = substr($relative_plugin_path, strlen($plugin_directory)); + } + + if ( is_dir( $plugin_directory . $relative_plugin_path ) ) { + $relative_plugin_path = rtrim( $relative_plugin_path, '/' ) . '/'; + } + + $active_plugins = get_option( 'active_plugins' ); + foreach ( $active_plugins as $plugin ) { + if ( substr( $plugin, 0, strlen( $relative_plugin_path ) ) === $relative_plugin_path ) { + ob_end_clean(); + die( 'true' ); + } + } + die( ob_get_flush() ?: 'false' ); + `, + env: { + DOCROOT: docroot, + PLUGIN_PATH: pluginPath, + }, + }); + + if (isActiveCheckResult.text === 'true') { + // Plugin activation was successful, yay! + return; + } + + if (isActiveCheckResult.text !== 'false') { + logger.debug(isActiveCheckResult.text); + } + throw new Error( + `Plugin ${pluginPath} could not be activated – WordPress exited with no error. ` + + `Sometimes, when $_SERVER or site options are not configured correctly, ` + + `WordPress exits early with a 301 redirect. ` + + `Inspect the "debug" logs in the console for more details.` + ); }; diff --git a/packages/playground/data-liberation-static-files-editor/WP_Static_File_Sync.php b/packages/playground/data-liberation-static-files-editor/WP_Static_File_Sync.php new file mode 100644 index 0000000000..b4ebd5e696 --- /dev/null +++ b/packages/playground/data-liberation-static-files-editor/WP_Static_File_Sync.php @@ -0,0 +1,492 @@ +<?php + +use WordPress\Filesystem\WP_Filesystem; + +class WP_Static_File_Sync +{ + /** + * @var string The post type to sync files for. + */ + private $post_type; + + /** + * @var array The previous post data before it was saved. + */ + private $previous_post; + + /** + * @var WP_Filesystem The filesystem to manage the static files in. + */ + private WP_Filesystem $filesystem; + + private $last_error = null; + private $index_file_pattern; + private $default_index_filename; + + /** + * Initialize the file sync manager + * + * @param WP_Static_Page_Manager $page_manager Manager for static page files + * @param array $options { + * Optional. Configuration options for the sync manager. + * + * @type string $post_type The post type to sync files for.. + * } + */ + public function __construct( + WP_Filesystem $filesystem, + $options = [] + ) { + $this->filesystem = $filesystem; + $this->post_type = $options['post_type'] ?? WP_LOCAL_FILE_POST_TYPE; + $this->index_file_pattern = $options['index_file_pattern'] ?? '/^index\.\w+$/'; + $this->default_index_filename = $options['default_index_filename'] ?? 'index.md'; + } + + public function initialize_sync() { + add_action('pre_post_update', [$this, 'cache_previous_post']); + add_action('save_post', [$this, 'on_save_post'], 10, 3); + add_action('delete_post', [$this, 'on_delete_post']); + } + + public function deinitialize_sync() { + // @TODO: Confirm we don't have to preserve the original $callback + // array for remove_action() to work. + remove_action('pre_post_update', [$this, 'cache_previous_post']); + remove_action('save_post', [$this, 'on_save_post'], 10, 3); + remove_action('delete_post', [$this, 'on_delete_post']); + } + + /** + * Cache the post data before it gets updated + */ + public function cache_previous_post($post_id) { + $this->previous_post = get_post($post_id, ARRAY_A); + } + + /** + * Handle saving of a post or page. + */ + public function on_save_post(int $post_id, WP_Post $post, bool $update): void + { + if (!$this->wordpress_ready_for_sync()) { + return; + } + + if ( + empty($post->ID) || + $post->post_status !== 'publish' || + $post->post_type !== $this->post_type + ) { + return; + } + + try { + // Ensure the parent directory exists. + if($post->post_parent) { + $parent_file_path_before = get_post_meta($post->post_parent, 'local_file_path', true); + $parent_file_path_after = $this->ensure_is_directory_index($parent_file_path_before); + if(false === $parent_file_path_after) { + $this->bail( + 'failed_to_ensure_parent_directory_index', + 'Failed to ensure parent directory index for post ' . $post->post_parent + ); + return; + } + if($parent_file_path_after !== $parent_file_path_before) { + update_post_meta($post->post_parent, 'local_file_path', $parent_file_path_after); + } + $parent_dir = dirname($parent_file_path_after); + } else { + $parent_dir = '/'; + } + + // @TODO: Handle creation of a new post + + // Figure out the new local file path of the updated page. + $has_children = !!get_posts([ + 'post_type' => $this->post_type, + 'post_parent' => $post->ID, + 'numberposts' => 1, + 'fields' => 'ids' + ]); + $parent_changed = $post->post_parent !== $this->previous_post['post_parent']; + $local_path_before = get_post_meta($post_id, 'local_file_path', true) ?? ''; + if($has_children && $parent_changed) { + // Move the entire directory subtree to the new parent. + $local_path_to_move_from = dirname($local_path_before); + $local_path_to_move_to = $this->append_unique_suffix( + wp_join_paths($parent_dir, basename($local_path_to_move_from)) + ); + if(false === $this->filesystem->rename($local_path_to_move_from, $local_path_to_move_to)) { + $this->bail('failed_to_rename_file', 'Failed to rename file: ' . $local_path_to_move_from); + return; + } + update_post_meta($post_id, 'local_file_path', $local_path_to_move_to); + $local_path_changed = true; + $local_path_after = wp_join_paths($local_path_to_move_to, basename($local_path_before)); + } else { + $filename_after = $has_children ? 'index' : sanitize_title($post->post_name); + + $extension = pathinfo($local_path_before, PATHINFO_EXTENSION) ?: 'md'; + if($extension) { + $filename_after .= '.' . $extension; + } + + $local_path_after = wp_join_paths($parent_dir, $filename_after); + $local_path_changed = !$local_path_before || $local_path_before !== $local_path_after; + if($local_path_changed) { + $local_path_after = $this->append_unique_suffix($local_path_after); + } + + $success = $this->filesystem->put_contents( + $local_path_after, + $this->convert_content($post_id) + ); + if(false === $success) { + $this->bail('failed_to_create_file', 'Failed to create file: ' . $local_path_after); + return; + } + } + if($local_path_changed) { + if($local_path_before) { + $this->filesystem->rm($local_path_before); + } + update_post_meta($post_id, 'local_file_path', $local_path_after); + } + + // If we're moving the page under a new parent, flatten the old parent's + // directory if it now contains only the index file. + if($parent_changed && $this->previous_post['post_parent']) { + $old_parent_prev_path = get_post_meta($this->previous_post['post_parent'], 'local_file_path', true); + $old_parent_new_path = $this->flatten_parent_if_needed($old_parent_prev_path); + if($old_parent_new_path) { + update_post_meta($this->previous_post['post_parent'], 'local_file_path', $old_parent_new_path); + } + } + + // If the page we just updated was a parent node itself, update, the local_file_path + // meta of its entire subtree. + if($this->previous_post['post_parent']) { + $this->update_indexed_local_file_paths_for_children($post->ID); + } + } catch(Exception $e) { + // @TODO: Handle failures gracefully. + var_dump($e->getMessage()); + var_dump($e->getTraceAsString()); + throw $e; + } + } + + private function update_indexed_local_file_paths_for_children($parent_id) { + $children = get_posts([ + 'post_type' => $this->post_type, + 'post_parent' => $parent_id, + ]); + if(empty($children)) { + return; + } + $parent_new_file_path = get_post_meta($parent_id, 'local_file_path', true); + foreach($children as $child) { + $child_local_path_before = get_post_meta($child->ID, 'local_file_path', true); + $child_local_path_after = dirname($parent_new_file_path) . '/' . basename($child_local_path_before); + update_post_meta($child->ID, 'local_file_path', $child_local_path_after); + $this->update_indexed_local_file_paths_for_children($child->ID); + } + } + + /** + * Handle deletion of a post or page. + */ + public function on_delete_post(int $post_id): void + { + if (!$this->wordpress_ready_for_sync()) { + return; + } + if (!$post_id) { + return; + } + $post = get_post($post_id); + if ( + $post->post_status !== 'publish' || + $post->post_type !== $this->post_type + ) { + return; + } + + $relative_path = get_post_meta($post_id, 'local_file_path', true); + if (! $relative_path ) { + return; + } + + if (! $this->filesystem->exists($relative_path)) { + return; + } + + $this->delete_page($relative_path); + } + + private function wordpress_ready_for_sync(): bool { + // Ignore auto-saves or revisions + if (defined('DOING_AUTOSAVE') && DOING_AUTOSAVE) { + return false; + } + + // Skip if in maintenance mode + if (wp_is_maintenance_mode()) { + return false; + } + + if (defined('WP_IMPORTING') && WP_IMPORTING) { + return false; + } + + return true; + } + + private function convert_content( $page_id ) { + $page = get_post($page_id); + if(!$page) { + return ''; + } + + $content_converter = get_post_meta($page_id, 'content_converter', true) ?: 'md'; + + $title_block = ( + WP_Import_Utils::block_opener('heading', array('level' => 1)) . + '<h1>' . esc_html(get_the_title($page_id)) . '</h1>' . + WP_Import_Utils::block_closer('heading') + ); + $block_markup = $title_block . $page->post_content; + + switch($content_converter) { + case 'html': + case 'xhtml': + // @TODO: Implement a Blocks to HTML converter – OR just render + // the blocks. + break; + case 'md': + default: + $converter = new WP_Blocks_To_Markdown( + $block_markup, + array( + 'title' => get_the_title($page_id), + ) + ); + if(false === $converter->convert()) { + // @TODO: error handling. + return; + } + return $converter->get_result(); + break; + } + } + + public function get_last_error() + { + return $this->last_error; + } + + /** + * Ensure the given path is a directory index (e.g., becomes `/index.{extension}`). + */ + public function ensure_is_directory_index(string $path) + { + // If we're given a directory, ensure it has an index file. + if ($this->filesystem->is_dir($path)) { + $index_file = $this->find_index_file($path); + if (!$index_file) { + // Default to Markdown. @TODO: Make this configurable. + $index_file = wp_join_paths($path, $this->default_index_filename); + $this->filesystem->put_contents($index_file, ''); // Create an empty index file + } + return $index_file; + } + + // If we're given a file, create a parent directory with the + // same name (without the extension) and move the file inside + // as its index file. + if ($this->filesystem->is_file($path)) { + // If the file is already an index file, we're done. + if($this->is_index_file($path)) { + return $path; + } + + // @TODO: Handle a file with no extension. + $extension = pathinfo($path, PATHINFO_EXTENSION); + + $swap_path = $path; + if ( $extension ) { + $new_dir = $this->remove_extension($path); + } else { + $new_dir = $path; + /** + * When the file has no extension, $new_dir is the same as $path. + * We need to rename the file to a unique name to avoid collisions. + */ + $swap_path = $this->append_unique_suffix($path); + if(!$this->filesystem->rename($path, $swap_path)) { + $this->bail('failed_to_rename_file', 'Failed to rename file: ' . $path); + return false; + } + } + + if(!$this->filesystem->mkdir($new_dir)) { + $this->bail('failed_to_create_directory', 'Failed to create directory: ' . $new_dir); + return false; + } + + $new_filename = $this->remove_extension($this->default_index_filename); + if ($extension) { + $new_filename .= ".{$extension}"; + } + + $index_file = wp_join_paths($new_dir, $new_filename); + if(!$this->filesystem->rename($swap_path, $index_file)) { + $this->bail('failed_to_rename_file', 'Failed to rename file: ' . $path); + return false; + } + return $index_file; + } + + $this->bail('path_not_found', 'Path does not exist: ' . $path); + return false; + } + + /** + * Flatten a parent directory if it only contains an `index` file. + */ + public function flatten_parent_if_needed(string $directory_index_path): bool + { + if ($this->filesystem->is_file($directory_index_path)) { + $parent_dir = dirname($directory_index_path); + } else if ($this->filesystem->is_dir($directory_index_path)) { + $parent_dir = $directory_index_path; + } else { + return $directory_index_path; + } + + if(!$parent_dir || $parent_dir === '/') { + return $directory_index_path; + } + + $files = $this->filesystem->ls($parent_dir); + if(count($files) === 0) { + $this->filesystem->rmdir($parent_dir); + return $directory_index_path; + } + + // Can't flatten if there are more than one file in the parent directory. + if (count($files) !== 1) { + return $directory_index_path; + } + + if ($this->filesystem->is_dir($directory_index_path)) { + $directory_index_path = $directory_index_path . '/' . $files[0]; + } + + if($this->is_index_file($files[0])) { + // If the directory index is an index file, rename it from "index" + // to the parent directory name + $extension = pathinfo($directory_index_path, PATHINFO_EXTENSION); + $new_filename = basename($parent_dir); + if ($extension) { + $new_filename .= ".{$extension}"; + } + } else { + // If the directory index is not an index file, keep its name + $new_filename = $files[0]; + } + + $new_path = $this->append_unique_suffix( + wp_join_paths(dirname($parent_dir), $new_filename) + ); + if (!$this->filesystem->rename($directory_index_path, $new_path)) { + $this->bail('failed_to_rename_file', 'Failed to rename file: ' . $directory_index_path . ' to ' . $new_path); + return false; + } + + if (!$this->filesystem->rmdir($parent_dir)) { + $this->bail('failed_to_delete_directory', 'Failed to delete directory: ' . $parent_dir); + return false; + } + + return $new_path; + } + + /** + * Delete a file and remove its parent directory if it becomes empty. + */ + private function delete_page(string $path): bool + { + if (!$this->filesystem->is_file($path)) { + $this->bail('path_not_found', 'Path does not exist: ' . $path); + return false; + } + + // Delete the file + if(!$this->filesystem->rm($path)) { + $this->bail('failed_to_delete_file', 'Failed to delete file: ' . $path); + return false; + } + + return $this->flatten_parent_if_needed($path); + } + + /** + * Append a unique suffix to a file path to avoid collisions. + */ + private function append_unique_suffix(string $path): string + { + $dir = dirname($path); + $filename = basename($path); + $extension = pathinfo($filename, PATHINFO_EXTENSION); + $name = pathinfo($filename, PATHINFO_FILENAME); + + $new_path = $path; + $counter = 1; + while ($this->filesystem->exists($new_path)) { + $new_filename = $name . "-{$counter}"; + if ($extension) { + $new_filename .= "." . $extension; + } + $new_path = wp_join_paths($dir, $new_filename); + $counter++; + } + return $new_path; + } + + /** + * Find the index file in a directory. + * + * @TODO: Make configurable. + */ + private function find_index_file(string $directory): ?string + { + $files = $this->filesystem->ls($directory); + foreach ($files as $file) { + if ($this->is_index_file($file)) { + return $file; + } + } + return null; + } + + private function is_index_file(string $path): bool + { + return preg_match($this->index_file_pattern, basename($path)); + } + + private function remove_extension(string $path): string + { + $extension = pathinfo($path, PATHINFO_EXTENSION); + return substr($path, 0, -strlen(".{$extension}")); + } + + private function bail($code, $message) { + throw new Exception("$code: $message"); + // $this->last_error = new WP_Error($code, $message); + // return false; + } + +} diff --git a/packages/playground/data-liberation-static-files-editor/blueprint.json b/packages/playground/data-liberation-static-files-editor/blueprint.json index 0e3b539bdf..0ec3011920 100644 --- a/packages/playground/data-liberation-static-files-editor/blueprint.json +++ b/packages/playground/data-liberation-static-files-editor/blueprint.json @@ -1,6 +1,7 @@ { "$schema": "../blueprints/public/blueprint-schema.json", "login": true, + "landingPage": "/wp-admin/edit.php?post_type=local_file", "constants": { "WP_DEBUG": true, "WP_DEBUG_LOG": true, diff --git a/packages/playground/data-liberation-static-files-editor/phpunit.xml b/packages/playground/data-liberation-static-files-editor/phpunit.xml new file mode 100644 index 0000000000..e054955f90 --- /dev/null +++ b/packages/playground/data-liberation-static-files-editor/phpunit.xml @@ -0,0 +1,14 @@ +<?xml version="1.0"?> +<phpunit + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + bootstrap="tests/bootstrap.php" + colors="true" + xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd" + cacheDirectory=".phpunit.cache" +> + <testsuites> + <testsuite name="Application Test Suite"> + <file>tests/WPStaticFileSyncTests.php</file> + </testsuite> + </testsuites> +</phpunit> diff --git a/packages/playground/data-liberation-static-files-editor/plugin.php b/packages/playground/data-liberation-static-files-editor/plugin.php index 423336b481..32f260fc64 100644 --- a/packages/playground/data-liberation-static-files-editor/plugin.php +++ b/packages/playground/data-liberation-static-files-editor/plugin.php @@ -9,20 +9,46 @@ define( 'WP_STATIC_CONTENT_DIR', WP_CONTENT_DIR . '/uploads/static-pages' ); } +if( ! defined( 'WP_LOCAL_FILE_POST_TYPE' )) { + define( 'WP_LOCAL_FILE_POST_TYPE', 'local_file' ); +} + if(isset($_GET['dump'])) { add_action('init', function() { WP_Static_Files_Editor_Plugin::import_static_pages(); }); } -class WP_Static_Files_Editor_Plugin { +require_once __DIR__ . '/WP_Static_File_Sync.php'; - private static $importing = false; +class WP_Static_Files_Editor_Plugin { static public function register_hooks() { + $fs = new WP_Filesystem( WP_STATIC_CONTENT_DIR ); + $static_sync = new WP_Static_File_Sync( $fs ); + $static_sync->initialize_sync(); + register_activation_hook( __FILE__, array(self::class, 'import_static_pages') ); - add_action('save_post', array(self::class, 'on_save_post'), 10, 3); - add_action('before_delete_post', array(self::class, 'on_delete_post')); + + add_action('init', function() { + self::register_post_type(); + }); + + add_filter('manage_local_file_posts_columns', function($columns) { + $columns['local_file_path'] = 'Local File Path'; + return $columns; + }); + + add_action('manage_local_file_posts_custom_column', function($column_name, $post_id) use ($fs) { + if ($column_name === 'local_file_path') { + $local_file_path = get_post_meta($post_id, 'local_file_path', true); + echo esc_html($local_file_path); + if(!$fs->is_file($local_file_path)) { + echo ' <span style="color: red;">(missing)</span>'; + } + } + }, 10, 2); + } /** @@ -33,10 +59,12 @@ static public function import_static_pages() { return; } - if ( self::$importing ) { + if ( defined('WP_IMPORTING') && WP_IMPORTING ) { return; } - self::$importing = true; + define('WP_IMPORTING', true); + + self::register_post_type(); // Prevent ID conflicts self::reset_db_data(); @@ -44,8 +72,10 @@ static public function import_static_pages() { $importer = WP_Stream_Importer::create( function () { return new WP_Filesystem_Entity_Reader( - new WP_Filesystem(), - WP_STATIC_CONTENT_DIR + new WP_Filesystem(WP_STATIC_CONTENT_DIR), + array( + 'post_type' => WP_LOCAL_FILE_POST_TYPE, + ) ); }, array(), @@ -60,8 +90,36 @@ function () { ); data_liberation_import_step( $import_session, $importer ); + } - self::$importing = false; + static private function register_post_type() { + register_post_type(WP_LOCAL_FILE_POST_TYPE, array( + 'labels' => array( + 'name' => 'Local Files', + 'singular_name' => 'Local File', + 'add_new' => 'Add New', + 'add_new_item' => 'Add New Local File', + 'edit_item' => 'Edit Local File', + 'new_item' => 'New Local File', + 'view_item' => 'View Local File', + 'search_items' => 'Search Local Files', + 'not_found' => 'No local files found', + 'not_found_in_trash' => 'No local files found in Trash', + ), + 'public' => true, + 'show_ui' => true, + 'show_in_menu' => true, + 'hierarchical' => true, + 'supports' => array( + 'title', + 'editor', + 'page-attributes', + 'revisions', + 'custom-fields' + ), + 'has_archive' => false, + 'show_in_rest' => true, + )); } /** @@ -83,169 +141,6 @@ static private function reset_db_data() { $GLOBALS['@pdo']->query("UPDATE SQLITE_SEQUENCE SET SEQ=0 WHERE NAME='wp_commentmeta'"); } - /** - * Handle post deletion by removing associated files and directories - */ - static public function on_delete_post($post_id) { - if (get_post_type($post_id) !== 'page') { - return; - } - - $source_path = get_post_meta($post_id, 'source_path_relative', true); - if (!empty($source_path)) { - $full_path = WP_STATIC_CONTENT_DIR . '/' . $source_path; - $dir_path = dirname($full_path); - - // Delete the file - if (file_exists($full_path)) { - unlink($full_path); - } - - // If this was a parent page with index.md, delete its directory too - if (basename($full_path) === 'index.md') { - self::deltree($dir_path); - } - } - } - - /** - * Recursively delete a directory and its contents - */ - static private function deltree($dir) { - $files = array_diff(scandir($dir), array('.','..')); - foreach ($files as $file) { - $path = "$dir/$file"; - is_dir($path) ? self::deltree($path) : unlink($path); - } - return rmdir($dir); - } - - /** - * Handle post saving and file organization - */ - static public function on_save_post($post_id, $post, $update) { - if (self::$importing || get_post_type($post_id) !== 'page') { - return; - } - - $parent_id = wp_get_post_parent_id($post_id); - $content_converter = get_post_meta($post_id, 'content_converter', true) ?: 'md'; - $old_relative_path = get_post_meta($post_id, 'source_path_relative', true); - - $new_relative_path = $old_relative_path; - if (empty($new_relative_path)) { - $new_relative_path = sanitize_title($post->post_title) . '.' . $content_converter; - } - - // Determine the new relative path - if ($parent_id) { - $parent_file_path = get_post_meta($parent_id, 'source_path_relative', true); - - // If parent file exists but isn't in a subdirectory, move it - if(!file_exists(WP_STATIC_CONTENT_DIR . '/' . $parent_file_path)) { - // @TODO: error handling. Maybe just backfill the paths? - throw new Exception('Parent file does not exist: ' . WP_STATIC_CONTENT_DIR . '/' . $parent_file_path); - } - - $parent_filename = basename($parent_file_path, '.md'); - if('index' !== $parent_filename) { - $swap_file = $parent_file_path . '.swap'; - rename( - WP_STATIC_CONTENT_DIR . '/' . $parent_file_path, - WP_STATIC_CONTENT_DIR . '/' . $swap_file - ); - - $parent_dir = dirname($parent_file_path) . '/' . basename($parent_file_path, '.md'); - mkdir(WP_STATIC_CONTENT_DIR . '/' . $parent_dir, 0777, true); - - $parent_file_path = $parent_dir . '/index.md'; - rename( - WP_STATIC_CONTENT_DIR . '/' . $swap_file, - WP_STATIC_CONTENT_DIR . '/' . $parent_file_path - ); - update_post_meta($parent_id, 'source_path_relative', $parent_file_path); - - $new_relative_path = $parent_dir . '/' . $new_relative_path; - } - } - - // Handle file moves for existing pages - if (!empty($old_relative_path) && $old_relative_path !== $new_relative_path) { - $old_path = WP_STATIC_CONTENT_DIR . '/' . $old_relative_path; - $new_path = WP_STATIC_CONTENT_DIR . '/' . $new_relative_path; - - // Create parent directory if needed - if (!is_dir(dirname($new_path))) { - mkdir(dirname($new_path), 0777, true); - } - - // Move the file/directory - if (file_exists($old_path)) { - rename($old_path, $new_path); - } - - // Clean up empty directories - // $old_dir = dirname($old_path); - // if (is_dir($old_dir) && !(new \FilesystemIterator($old_dir))->valid()) { - // rmdir($old_dir); - // } - // Update the source path meta - } - - update_post_meta($post_id, 'source_path_relative', $new_relative_path); - // Save the content - self::save_page_content($post_id); - } - - /** - * Save a single page's content to file - */ - static private function save_page_content($page_id) { - $page = get_post($page_id); - $content_converter = get_post_meta($page_id, 'content_converter', true) ?: 'md'; - - $title_block = ( - WP_Import_Utils::block_opener('heading', array('level' => 1)) . - '<h1>' . esc_html(get_the_title($page_id)) . '</h1>' . - WP_Import_Utils::block_closer('heading') - ); - $block_markup = $title_block . $page->post_content; - - switch($content_converter) { - case 'html': - case 'xhtml': - // @TODO: Implement a Blocks to HTML converter – OR just render - // the blocks. - break; - case 'md': - default: - $converter = new WP_Blocks_To_Markdown( - $block_markup, - array( - 'title' => get_the_title($page_id), - ) - ); - if(false === $converter->convert()) { - // @TODO: error handling. - return; - } - $content = $converter->get_result(); - break; - } - - $source_path_relative = get_post_meta($page_id, 'source_path_relative', true); - if($source_path_relative) { - $source_file_path = WP_STATIC_CONTENT_DIR . '/' . $source_path_relative; - - // Ensure directory exists - if (!is_dir(dirname($source_file_path))) { - mkdir(dirname($source_file_path), 0777, true); - } - - // Save the content - file_put_contents($source_file_path, $content); - } - } } WP_Static_Files_Editor_Plugin::register_hooks(); diff --git a/packages/playground/data-liberation-static-files-editor/run.sh b/packages/playground/data-liberation-static-files-editor/run.sh index d33cb2b3e1..a68be80ce3 100644 --- a/packages/playground/data-liberation-static-files-editor/run.sh +++ b/packages/playground/data-liberation-static-files-editor/run.sh @@ -1,5 +1,8 @@ #!/bin/bash +rm -rf ./my-notes/workdir/* +cp -r ./my-notes/safe-copy/* ./my-notes/workdir/ + bun ../cli/src/cli.ts \ server \ --mount=../data-liberation-static-files-editor:/wordpress/wp-content/plugins/z-data-liberation-static-files-editor \ diff --git a/packages/playground/data-liberation-static-files-editor/tests/WPStaticFileSyncTests.php b/packages/playground/data-liberation-static-files-editor/tests/WPStaticFileSyncTests.php new file mode 100644 index 0000000000..6b312b812f --- /dev/null +++ b/packages/playground/data-liberation-static-files-editor/tests/WPStaticFileSyncTests.php @@ -0,0 +1,125 @@ +<?php + +use PHPUnit\Framework\TestCase; +use WordPress\Filesystem\WP_Filesystem; + +class WPStaticFileSyncTests extends TestCase { + + private $filesystem; + + public function setUp(): void { + $this->filesystem = new WP_Filesystem(__DIR__ . '/static-files-tests/'); + } + + public function tearDown(): void { + // $this->filesystem->rmdir('/', ['recursive' => true]); + } + + public function test_flatten_parent_if_needed_moves_lone_file_one_level_up() { + $this->setup_directory_tree([ + 'pride-and-prejudice' => [ + 'index.md' => 'Test parent', + ], + ]); + + $sync = new WP_Static_File_Sync($this->filesystem); + $this->assertTrue($sync->flatten_parent_if_needed('/pride-and-prejudice/index.md')); + + $fs = $this->filesystem; + $this->assertFalse($fs->exists('/pride-and-prejudice')); + $this->assertTrue($fs->is_file('/pride-and-prejudice.md')); + } + + public function test_flatten_parent_if_needed_does_not_move_file_if_parent_is_not_empty() { + $this->setup_directory_tree([ + 'pride-and-prejudice' => [ + 'index.md' => 'Test parent', + 'other.md' => 'Test other', + ], + ]); + + $sync = new WP_Static_File_Sync($this->filesystem); + $this->assertTrue($sync->flatten_parent_if_needed('/pride-and-prejudice/index.md')); + + $fs = $this->filesystem; + $this->assertTrue($fs->exists('/pride-and-prejudice')); + $this->assertTrue($fs->is_file('/pride-and-prejudice/index.md')); + $this->assertTrue($fs->is_file('/pride-and-prejudice/other.md')); + } + + public function test_flatten_parent_if_needed_acts_on_a_directory_path() { + $this->setup_directory_tree([ + 'pride-and-prejudice' => [ + 'index.md' => 'Test parent', + ], + ]); + + $sync = new WP_Static_File_Sync($this->filesystem); + $this->assertTrue($sync->flatten_parent_if_needed('/pride-and-prejudice')); + + $fs = $this->filesystem; + $this->assertFalse($fs->exists('/pride-and-prejudice')); + $this->assertTrue($fs->is_file('/pride-and-prejudice.md')); + } + + public function test_ensure_is_directory_index_creates_an_index_file_if_needed() { + $this->setup_directory_tree([ + 'pride-and-prejudice.md' => 'Pride and Prejudice', + ]); + + $sync = new WP_Static_File_Sync($this->filesystem); + $this->assertEquals( + '/pride-and-prejudice/index.md', + $sync->ensure_is_directory_index('/pride-and-prejudice.md') + ); + + $fs = $this->filesystem; + $this->assertFalse($fs->is_file('/pride-and-prejudice.md')); + $this->assertTrue($fs->is_file('/pride-and-prejudice/index.md')); + $this->assertEquals('Pride and Prejudice', $fs->read_file('/pride-and-prejudice/index.md')); + } + + public function test_ensure_is_directory_index_returns_the_index_file_if_it_already_exists() { + $this->setup_directory_tree([ + 'pride-and-prejudice' => [ + 'index.md' => 'Pride and Prejudice', + ], + ]); + + $sync = new WP_Static_File_Sync($this->filesystem); + $this->assertEquals('/pride-and-prejudice/index.md', $sync->ensure_is_directory_index('/pride-and-prejudice/index.md')); + + $fs = $this->filesystem; + $this->assertTrue($fs->is_file('/pride-and-prejudice/index.md')); + $this->assertEquals('Pride and Prejudice', $fs->read_file('/pride-and-prejudice/index.md')); + } + + private function setup_directory_tree($structure, $path_so_far = '/') { + $filesystem = $this->filesystem; + if($path_so_far === '/') { + if($filesystem->exists('/')) { + // Reset the root directory + if(false === $filesystem->rmdir('/', ['recursive' => true])) { + throw new Exception('Failed to remove directory /'); + } + } + if(false === $filesystem->mkdir('/')) { + throw new Exception('Failed to create directory /'); + } + } + foreach($structure as $name => $content) { + $path = rtrim($path_so_far, '/') . '/' . $name; + if(is_array($content)) { + if(false === $filesystem->mkdir($path)) { + throw new Exception('Failed to create directory ' . $path); + } + $this->setup_directory_tree($content, $path); + } else { + if(false === $filesystem->put_contents($path, $content)) { + throw new Exception('Failed to create file ' . $path); + } + } + } + } + +} diff --git a/packages/playground/data-liberation-static-files-editor/tests/bootstrap.php b/packages/playground/data-liberation-static-files-editor/tests/bootstrap.php new file mode 100644 index 0000000000..31f3371145 --- /dev/null +++ b/packages/playground/data-liberation-static-files-editor/tests/bootstrap.php @@ -0,0 +1,8 @@ +<?php + +error_reporting(E_ALL); +ini_set('display_errors', 1); + +require_once __DIR__ . '/../../data-liberation/bootstrap.php'; +require_once __DIR__ . '/../../data-liberation-markdown/src/bootstrap.php'; +require_once __DIR__ . '/../WP_Static_File_Sync.php'; diff --git a/packages/playground/data-liberation-static-files-editor/tests/static-files-tests/pride-and-prejudice/index.md b/packages/playground/data-liberation-static-files-editor/tests/static-files-tests/pride-and-prejudice/index.md new file mode 100644 index 0000000000..aab6f5a69a --- /dev/null +++ b/packages/playground/data-liberation-static-files-editor/tests/static-files-tests/pride-and-prejudice/index.md @@ -0,0 +1 @@ +Pride and Prejudice diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index 7627b23d01..802b07337f 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -180,3 +180,17 @@ function reset_mbstring_encoding() { mbstring_binary_safe_encoding( true ); } } + +if ( ! class_exists( 'WP_Error' ) ) { + class WP_Error { + public $code; + public $message; + public $data; + + public function __construct( $code, $message, $data = array() ) { + $this->code = $code; + $this->message = $message; + $this->data = $data; + } + } +} diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php index 7a578f9538..6259e88ad8 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Directory_Tree_Entity_Reader.php @@ -109,7 +109,7 @@ public function next_entity() { $this->parent_ids[ $missing_parent_id_depth ] = $this->emit_post_entity( array( 'content' => '', - 'source_path' => $missing_parent_path, + 'local_file_path' => $missing_parent_path, 'parent_id' => $this->parent_ids[ $missing_parent_id_depth - 1 ] ?? null, 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $missing_parent_path ) ), ) @@ -120,7 +120,7 @@ public function next_entity() { $this->parent_ids[ $depth ] = $this->emit_post_entity( array( 'content' => '', - 'source_path' => $dir, + 'local_file_path' => $dir, 'parent_id' => $parent_id, 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $dir ) ), ) @@ -132,7 +132,7 @@ public function next_entity() { $this->parent_ids[ $depth ] = $this->emit_post_entity( array( 'content' => $this->filesystem->read_file( $file_path ), - 'source_path' => $file_path, + 'local_file_path' => $file_path, 'parent_id' => $parent_id, 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $file_path ) ), ) @@ -149,7 +149,7 @@ public function next_entity() { $this->emit_post_entity( array( 'content' => $this->filesystem->read_file( $file_path ), - 'source_path' => $file_path, + 'local_file_path' => $file_path, 'parent_id' => $parent_id, 'title_fallback' => WP_Import_Utils::slug_to_title( basename( $file_path ) ), ) @@ -197,7 +197,7 @@ protected function emit_post_entity( $options ) { $entity_data = array( 'post_id' => $this->next_post_id, 'post_type' => 'page', - 'guid' => $options['source_path'], + 'guid' => $options['local_file_path'], 'post_title' => $post_title, 'post_content' => $block_markup, 'post_excerpt' => $converter->get_meta_value( 'post_excerpt' ) ?? '', @@ -205,19 +205,19 @@ protected function emit_post_entity( $options ) { ); /** - * Technically `source_path` isn't a part of the WordPress post object, + * Technically `local_file_path` isn't a part of the WordPress post object, * but we need it to resolve relative URLs in the imported content. * * This path is relative to the root directory traversed by this class. */ - if ( ! empty( $options['source_path'] ) ) { - $source_path = $options['source_path']; + if ( ! empty( $options['local_file_path'] ) ) { + $local_file_path = $options['local_file_path']; $root_dir = $this->file_visitor->get_root_dir(); - if ( str_starts_with( $source_path, $root_dir ) ) { - $source_path = substr( $source_path, strlen( $root_dir ) ); + if ( str_starts_with( $local_file_path, $root_dir ) ) { + $local_file_path = substr( $local_file_path, strlen( $root_dir ) ); } - $source_path = ltrim( $source_path, '/' ); - $entity_data['source_path'] = $source_path; + $local_file_path = ltrim( $local_file_path, '/' ); + $entity_data['local_file_path'] = $local_file_path; } if ( $converter->get_meta_value( 'slug' ) ) { diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php index 668ea185a5..92f2521118 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_Entity_Reader.php @@ -6,14 +6,15 @@ class WP_Filesystem_Entity_Reader extends WP_Entity_Reader { private $post_tree; private $entities = array(); private $current_entity; + private $post_type; private $finished = false; - public function __construct( $filesystem, $root_dir = '/' ) { + public function __construct( $filesystem, $options = array() ) { $this->filesystem = $filesystem; + $this->post_type = $options['post_type'] ?? 'page'; $this->post_tree = WP_Filesystem_To_Post_Tree::create( $this->filesystem, array ( - 'root_dir' => $root_dir, 'first_post_id' => 2, 'filter_pattern' => '#\.(?:md|html|xhtml)$#', 'index_file_pattern' => '#^index\.[a-z]+$#', @@ -49,8 +50,8 @@ public function next_entity(): bool { $source_content_converter = null; $post_tree_node = $this->post_tree->get_current_node(); if($post_tree_node['type'] === 'file') { - $content = $this->filesystem->read_file($post_tree_node['source_path_absolute']); - $extension = pathinfo($post_tree_node['source_path_absolute'], PATHINFO_EXTENSION); + $content = $this->filesystem->read_file($post_tree_node['local_file_path']); + $extension = pathinfo($post_tree_node['local_file_path'], PATHINFO_EXTENSION); switch($extension) { case 'md': $converter = new WP_Markdown_To_Blocks( $content ); @@ -89,13 +90,13 @@ public function next_entity(): bool { $data = $entity->get_data(); if( $entity->get_type() === 'post' ) { $data['id'] = $post_tree_node['post_id']; - $data['guid'] = $post_tree_node['source_path_relative']; + $data['guid'] = $post_tree_node['local_file_path']; $data['post_parent'] = $post_tree_node['parent_id']; $data['post_title'] = $data['post_title'] ?? null; $data['post_status'] = 'publish'; - $data['post_type'] = 'page'; + $data['post_type'] = $this->post_type; if ( ! $data['post_title'] ) { - $data['post_title'] = WP_Import_Utils::slug_to_title( basename( $post_tree_node['source_path_relative'] ) ); + $data['post_title'] = WP_Import_Utils::slug_to_title( basename( $post_tree_node['local_file_path'] ) ); } $entity = new WP_Imported_Entity( $entity->get_type(), $data ); } @@ -104,7 +105,7 @@ public function next_entity(): bool { // Also emit: $additional_meta = array( - 'source_path_relative' => $post_tree_node['source_path_relative'], + 'local_file_path' => $post_tree_node['local_file_path'], 'source_type' => $post_tree_node['type'], 'source_content_converter' => $source_content_converter, ); diff --git a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php index a66cee982d..dde52c8671 100644 --- a/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php +++ b/packages/playground/data-liberation/src/entity-readers/WP_Filesystem_To_Post_Tree.php @@ -12,7 +12,6 @@ class WP_Filesystem_To_Post_Tree { private $parent_ids = array(); private $next_post_id; - private $root_dir; private $create_index_pages; private $entities_read_so_far = 0; private $filter_pattern = '##'; @@ -23,10 +22,6 @@ public static function create( \WordPress\Filesystem\WP_Abstract_Filesystem $filesystem, $options ) { - if ( ! isset( $options['root_dir'] ) ) { - _doing_it_wrong( __FUNCTION__, 'Missing required options: root_dir', '1.0.0' ); - return false; - } if ( ! isset( $options['first_post_id'] ) ) { _doing_it_wrong( __FUNCTION__, 'Missing required options: first_post_id', '1.0.0' ); return false; @@ -50,8 +45,7 @@ private function __construct( \WordPress\Filesystem\WP_Abstract_Filesystem $filesystem, $options ) { - $this->file_visitor = new WordPress\Filesystem\WP_Filesystem_Visitor( $filesystem, $options['root_dir'] ); - $this->root_dir = $options['root_dir']; + $this->file_visitor = new WordPress\Filesystem\WP_Filesystem_Visitor( $filesystem ); $this->create_index_pages = $options['create_index_pages'] ?? true; $this->next_post_id = $options['first_post_id']; $this->filter_pattern = $options['filter_pattern']; @@ -101,7 +95,7 @@ public function next_node() { $this->parent_ids[ $missing_parent_id_depth ] = $this->emit_object( array( 'type' => 'directory', - 'source_path_absolute' => $missing_parent_path, + 'local_file_path' => $missing_parent_path, 'parent_id' => $this->parent_ids[ $missing_parent_id_depth - 1 ] ?? null, ) ); @@ -111,7 +105,7 @@ public function next_node() { $this->parent_ids[ $depth ] = $this->emit_object( array( 'type' => 'file_placeholder', - 'source_path_absolute' => $dir, + 'local_file_path' => $dir, 'parent_id' => $parent_id, ) ); @@ -122,7 +116,7 @@ public function next_node() { $this->parent_ids[ $depth ] = $this->emit_object( array( 'type' => 'file', - 'source_path_absolute' => $file_path, + 'local_file_path' => $file_path, 'parent_id' => $parent_id, ) ); @@ -138,7 +132,7 @@ public function next_node() { $this->emit_object( array( 'type' => 'file', - 'source_path_absolute' => $file_path, + 'local_file_path' => $file_path, 'parent_id' => $parent_id, ) ); @@ -160,7 +154,6 @@ protected function emit_object( $options ) { $options, array( 'post_id' => $post_id, - 'source_path_relative' => substr( $options['source_path_absolute'], strlen( $this->root_dir ) ), ) ); ++$this->entities_read_so_far; @@ -182,7 +175,7 @@ private function next_file() { if ( $event->is_entering() ) { $abs_paths = array(); foreach ( $event->files as $filename ) { - $abs_paths[] = $event->dir . '/' . $filename; + $abs_paths[] = wp_join_paths( $event->dir, $filename ); } $this->pending_files = $this->choose_relevant_files( $abs_paths ); if ( ! count( $this->pending_files ) ) { diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php index e83e2aaa9e..55feb69709 100644 --- a/packages/playground/data-liberation/src/functions.php +++ b/packages/playground/data-liberation/src/functions.php @@ -289,3 +289,15 @@ function mb_str_split( $input, $split_length = 1, $encoding = null ) { return $result; } } + +function wp_join_paths() { + $paths = array(); + foreach ( func_get_args() as $arg ) { + if ( $arg !== '' ) { + $paths[] = $arg; + } + } + $path = implode('/', $paths); + + return preg_replace( '#/+#', '/', $path ); +} diff --git a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php index ec5a5a6d89..2a399b06a4 100644 --- a/packages/playground/data-liberation/src/import/WP_Entity_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Entity_Importer.php @@ -464,7 +464,6 @@ public function import_post( $data ) { $post_type = $data['post_type'] ?? 'post'; $post_type_object = get_post_type_object( $post_type ); - // Is this type even valid? if ( ! $post_type_object ) { $this->logger->warning( diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index 41a1b2c4c9..03e41536db 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -596,7 +596,7 @@ protected function frontload_next_entity() { $this->enqueue_attachment_download( $p->get_raw_url(), array( - 'context_path' => $post['source_path'] ?? $post['slug'] ?? null, + 'context_path' => $post['local_file_path'] ?? $post['slug'] ?? null, ) ); } @@ -660,7 +660,7 @@ protected function import_next_entity() { */ $asset_filename = $this->new_asset_filename( $p->get_raw_url(), - $data['source_path'] ?? $data['slug'] ?? null + $data['local_file_path'] ?? $data['slug'] ?? null ); if ( file_exists( $this->options['uploads_path'] . '/' . $asset_filename ) ) { $p->set_raw_url( diff --git a/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php b/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php index f8d65c0357..0dccac219e 100644 --- a/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php +++ b/packages/playground/data-liberation/tests/WPHTMLEntityReaderTests.php @@ -13,7 +13,7 @@ public function test_entity_reader() { <h1>It is our pleasure to announce that WordPress 6.8 was released</h1> <p>Last week, WordPress 6.8 was released.</p> HTML; - $reader = new WP_HTML_Entity_Reader( new WP_HTML_Processor( $html ), 1 ); + $reader = new WP_HTML_Entity_Reader( WP_HTML_Processor::create_fragment( $html ), 1 ); $entities = []; while ( $reader->next_entity() ) { $data = $reader->get_entity()->get_data(); @@ -64,11 +64,8 @@ public function test_entity_reader() { } private function normalize_markup( $markup ) { - $processor = new WP_HTML_Processor( $markup ); + $processor = WP_HTML_Processor::create_fragment( $markup ); $serialized = $processor->serialize(); - if(str_ends_with($serialized, "</body></html>")) { - $serialized = substr($serialized, 0, strlen("</body></html>")); - } return $serialized; } diff --git a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php index 83f03e8ed7..cf07907154 100644 --- a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php +++ b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php @@ -16,7 +16,7 @@ public function test_metadata_extraction() { <h1>WordPress 6.8 was released</h1> <p>Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.</p> HTML; - $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $html ) ); + $converter = new WP_HTML_To_Blocks( WP_HTML_Processor::create_fragment( $html ) ); $converter->convert( $html ); $metadata = $converter->get_all_metadata(); $expected_metadata = [ @@ -132,7 +132,7 @@ public function provider_test_conversion() { public function test_html_to_blocks_excerpt() { $input = file_get_contents( __DIR__ . '/fixtures/html-to-blocks/excerpt.input.html' ); - $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $input ) ); + $converter = new WP_HTML_To_Blocks( WP_HTML_Processor::create_fragment( $input ) ); $converter->convert( $input ); $blocks = $converter->get_block_markup(); From 3c04e0f8009b4fb66a2d0e5bb74c2348d7020e84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com> Date: Sun, 22 Dec 2024 19:46:31 +0100 Subject: [PATCH 25/26] Document a bunch of todos --- .../WP_Static_File_Sync.php | 89 +++++++++++-------- .../plugin.php | 15 ++++ 2 files changed, 68 insertions(+), 36 deletions(-) diff --git a/packages/playground/data-liberation-static-files-editor/WP_Static_File_Sync.php b/packages/playground/data-liberation-static-files-editor/WP_Static_File_Sync.php index b4ebd5e696..89b80b9c48 100644 --- a/packages/playground/data-liberation-static-files-editor/WP_Static_File_Sync.php +++ b/packages/playground/data-liberation-static-files-editor/WP_Static_File_Sync.php @@ -214,16 +214,36 @@ public function on_delete_post(int $post_id): void return; } - $relative_path = get_post_meta($post_id, 'local_file_path', true); - if (! $relative_path ) { + $local_file_path = get_post_meta($post_id, 'local_file_path', true); + if (! $local_file_path ) { return; } - if (! $this->filesystem->exists($relative_path)) { + if (! $this->filesystem->exists($local_file_path)) { return; } - $this->delete_page($relative_path); + $has_children = !!get_posts([ + 'post_type' => $this->post_type, + 'post_parent' => $post_id, + 'numberposts' => 1, + 'fields' => 'ids' + ]); + + if($has_children) { + $path_to_delete = dirname($local_file_path); + $success = $this->filesystem->rmdir($path_to_delete, ['recursive' => true]); + } else { + $path_to_delete = $local_file_path; + $success = $this->filesystem->rm($path_to_delete); + } + + if(!$success) { + $this->bail('failed_to_delete_directory', 'Failed to delete local file: ' . $path_to_delete); + return; + } + + $this->flatten_parent_if_needed(dirname($path_to_delete)); } private function wordpress_ready_for_sync(): bool { @@ -252,34 +272,50 @@ private function convert_content( $page_id ) { $content_converter = get_post_meta($page_id, 'content_converter', true) ?: 'md'; + /** + * @TODO: Decide – should we only do one of the following + * instead of both? + * + * 1. Include the title as the first H1 block + * 2. Include the title as a metadata field + */ $title_block = ( WP_Import_Utils::block_opener('heading', array('level' => 1)) . '<h1>' . esc_html(get_the_title($page_id)) . '</h1>' . WP_Import_Utils::block_closer('heading') ); $block_markup = $title_block . $page->post_content; + $metadata = array( + 'title' => get_the_title($page_id), + ); switch($content_converter) { - case 'html': - case 'xhtml': - // @TODO: Implement a Blocks to HTML converter – OR just render - // the blocks. - break; + // case 'blocks': + // $converter = new WP_Blocks_To_Blocks( + // $block_markup, + // $metadata + // ); + // break; + // case 'html': + // case 'xhtml': + // $converter = new WP_Blocks_To_HTML( + // $block_markup, + // $metadata + // ); + // break; case 'md': default: $converter = new WP_Blocks_To_Markdown( $block_markup, - array( - 'title' => get_the_title($page_id), - ) + $metadata ); - if(false === $converter->convert()) { - // @TODO: error handling. - return; - } - return $converter->get_result(); break; } + if(false === $converter->convert()) { + // @TODO: error handling. + return; + } + return $converter->get_result(); } public function get_last_error() @@ -414,25 +450,6 @@ public function flatten_parent_if_needed(string $directory_index_path): bool return $new_path; } - /** - * Delete a file and remove its parent directory if it becomes empty. - */ - private function delete_page(string $path): bool - { - if (!$this->filesystem->is_file($path)) { - $this->bail('path_not_found', 'Path does not exist: ' . $path); - return false; - } - - // Delete the file - if(!$this->filesystem->rm($path)) { - $this->bail('failed_to_delete_file', 'Failed to delete file: ' . $path); - return false; - } - - return $this->flatten_parent_if_needed($path); - } - /** * Append a unique suffix to a file path to avoid collisions. */ diff --git a/packages/playground/data-liberation-static-files-editor/plugin.php b/packages/playground/data-liberation-static-files-editor/plugin.php index 32f260fc64..5c9ddd73f0 100644 --- a/packages/playground/data-liberation-static-files-editor/plugin.php +++ b/packages/playground/data-liberation-static-files-editor/plugin.php @@ -1,6 +1,21 @@ <?php /** * Plugin Name: Data Liberation – WordPress Static files editor + * + * @TODO: Page metadata editor in Gutenberg + * @TODO: A special "filename" field in wp-admin and in Gutenberg. Either source from the page title or + * pin it to a specific, user-defined value. + * @TODO: Choose the local file storage format (MD, HTML, etc.) in Gutenberg page options. + * @TODO: HTML, XHTML, and Blocks renderers + * @TODO: Integrity check – is the database still in sync with the files? + * If not, what should we do? + * * Overwrite the database with the local files? This is a local files editor after all. + * * Display a warning in wp-admin and let the user decide what to do? + * @TODO: Consider tricky scenarios – moving a parent to trash and then restoring it. + * @TODO: Consider using hierarchical taxonomy to model the directory/file structure – instead of + * using the post_parent field. Could be more flexible (no need for index.md files) and require + * less complex operations in the code (no need to update a subtree of posts when moving a post, + * no need to periodically "flatten" the parent directory). */ use WordPress\Filesystem\WP_Filesystem; From cb20c92521426f8bc9dbe429e427a20bcf20c82e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com> Date: Sun, 22 Dec 2024 19:47:48 +0100 Subject: [PATCH 26/26] Adjust docstrings --- .../playground/data-liberation-static-files-editor/plugin.php | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/playground/data-liberation-static-files-editor/plugin.php b/packages/playground/data-liberation-static-files-editor/plugin.php index 5c9ddd73f0..a6f9826c10 100644 --- a/packages/playground/data-liberation-static-files-editor/plugin.php +++ b/packages/playground/data-liberation-static-files-editor/plugin.php @@ -16,6 +16,7 @@ * using the post_parent field. Could be more flexible (no need for index.md files) and require * less complex operations in the code (no need to update a subtree of posts when moving a post, * no need to periodically "flatten" the parent directory). + * @TODO: Maybe use Playground's FilePickerTree React component? Or re-implement it with interactivity API? */ use WordPress\Filesystem\WP_Filesystem;