diff --git a/packages/playground/data-liberation/src/git/WP_Git_Cached_Index.php b/packages/playground/data-liberation/src/git/WP_Git_Cached_Index.php index c62169a01a..7678c18652 100644 --- a/packages/playground/data-liberation/src/git/WP_Git_Cached_Index.php +++ b/packages/playground/data-liberation/src/git/WP_Git_Cached_Index.php @@ -8,8 +8,10 @@ class WP_Git_Cached_Index { private $oid; private $type; - private $length; - private $contents; + private $content_inflate_handle; + private $object_content_chunk; + private $called_next_object_chunk; + private $buffered_object_content; private $parsed_commit; private $parsed_tree; private $last_error; @@ -31,33 +33,142 @@ public function __construct( } } - /** - * @TODO: Streaming read. Don't load everything into memory. - */ public function read_object($oid) { - // Reset the object state - $this->oid = null; - $this->type = null; - $this->length = null; - $this->contents = null; - $this->parsed_commit = null; - $this->parsed_tree = null; + $this->reset(); + + $object_path = $this->get_object_path($oid); + if(!$this->fs->is_file($object_path)) { + return false; + } - $contents = $this->fs->read_file($this->get_object_path($oid)); - $contents = WP_Git_Pack_Processor::inflate($contents); - $type_length = strpos($contents, ' '); $this->oid = $oid; - $this->type = substr($contents, 0, $type_length); - $this->length = substr($contents, $type_length + 1, strpos($contents, "\x00", $type_length) - $type_length - 1); - $this->contents = substr($contents, strpos($contents, "\x00", $type_length) + 1); - if($this->type === WP_Git_Pack_Processor::OBJECT_NAMES[WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT]) { - $this->parsed_commit = WP_Git_Pack_Processor::parse_commit_message($this->contents); - } else if($this->type === WP_Git_Pack_Processor::OBJECT_NAMES[WP_Git_Pack_Processor::OBJECT_TYPE_TREE]) { - $this->parsed_tree = WP_Git_Pack_Processor::parse_tree_bytes($this->contents); + if(!$this->open_object_stream()) { + return false; + } + + // Read the object header and initialize the internal state + // for the specific get_* methods below. + $header = false; + $content = ''; + while($this->next_object_chunk()) { + $content .= $this->get_object_content_chunk(); + $null_byte_position = strpos($content, "\x00"); + if($null_byte_position === false) { + continue; + } + $header = substr($content, 0, $null_byte_position); + break; + } + + if(false === $header) { + $this->last_error = 'Failed to read the object header'; + return false; + } + + $this->object_content_chunk = substr($content, strlen($header) + 1); + + // Parse the header + $type_length = strpos($header, ' '); + $type = substr($header, 0, $type_length); + switch($type) { + case WP_Git_Pack_Processor::OBJECT_NAMES[WP_Git_Pack_Processor::OBJECT_TYPE_BLOB]: + $this->type = WP_Git_Pack_Processor::OBJECT_TYPE_BLOB; + break; + case WP_Git_Pack_Processor::OBJECT_NAMES[WP_Git_Pack_Processor::OBJECT_TYPE_TREE]: + $this->type = WP_Git_Pack_Processor::OBJECT_TYPE_TREE; + break; + case WP_Git_Pack_Processor::OBJECT_NAMES[WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT]: + $this->type = WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT; + break; + default: + $this->last_error = 'Invalid object type: ' . $type; + return false; } return true; } + public function get_type() { + return $this->type; + } + + public function get_length() { + return $this->fs->get_streamed_file_length(); + } + + private function open_object_stream() { + $this->content_inflate_handle = inflate_init(ZLIB_ENCODING_DEFLATE); + if(!$this->content_inflate_handle) { + $this->last_error = 'Failed to initialize inflate handle'; + return false; + } + if(!$this->fs->open_file_stream($this->get_object_path($this->oid))) { + return false; + } + return true; + } + + public function next_object_chunk() { + if(false === $this->fs->next_file_chunk()) { + $this->last_error = $this->fs->get_error_message(); + return false; + } + $this->called_next_object_chunk = true; + $chunk = $this->fs->get_file_chunk(); + $next_chunk = inflate_add($this->content_inflate_handle, $chunk); + if(false === $next_chunk) { + $this->last_error = 'Failed to inflate chunk'; + $this->close_object_stream(); + return false; + } + $this->object_content_chunk = $next_chunk; + return true; + } + + public function get_object_content_chunk() { + return $this->object_content_chunk; + } + + private function close_object_stream() { + $this->fs->close_file_stream(); + $this->content_inflate_handle = null; + return true; + } + + public function get_parsed_commit() { + if(null === $this->parsed_commit) { + $commit_contents = $this->read_entire_object_contents(); + $this->parsed_commit = WP_Git_Pack_Processor::parse_commit_message($commit_contents); + } + return $this->parsed_commit; + } + + public function get_parsed_tree() { + if(null === $this->parsed_tree) { + $tree_contents = $this->read_entire_object_contents(); + $this->parsed_tree = WP_Git_Pack_Processor::parse_tree_bytes($tree_contents); + } + return $this->parsed_tree; + } + + public function read_entire_object_contents() { + // If we've advanced the stream, we can't reuse it to read the entire + // object anymore. Let's re-initialize the stream. + if($this->called_next_object_chunk) { + $this->read_object($this->oid); + } + if(null !== $this->buffered_object_content) { + return $this->buffered_object_content; + } + // Load the entire object into memory and keep the result + // for later use. We'll likely need it again before we're + // done with the current object. + $this->buffered_object_content = $this->object_content_chunk; + while($this->next_object_chunk()) { + $this->buffered_object_content .= $this->get_object_content_chunk(); + } + return $this->buffered_object_content; + } + public function oid_exists($oid) { return $this->fs->is_file($this->get_object_path($oid)); } @@ -68,7 +179,10 @@ public function read_by_path($path, $root_tree_oid=null) { if(false === $this->read_object($head_oid)) { return false; } - $root_tree_oid = $this->get_commit_tree_oid(); + $root_tree_oid = $this->get_parsed_commit()['tree'] ?? null; + } + if($root_tree_oid === null) { + return false; } if(false === $this->read_object($root_tree_oid)) { return false; @@ -93,41 +207,90 @@ public function read_by_path($path, $root_tree_oid=null) { return true; } - public function get_descendants($tree_oid) { - if(false === $this->read_object($tree_oid)) { - return []; + public function get_last_error() { + return $this->last_error; + } + + public function iterate_objects_added_in($new_tree_oid, $old_tree_oid=null) { + if($new_tree_oid === $old_tree_oid) { + return false; } - foreach ($this->parsed_tree as $object) { - if ($object['mode'] === WP_Git_Pack_Processor::FILE_MODE_DIRECTORY) { - yield from $this->get_descendants($object['sha1']); - } else { - yield $object; - } + + // Resolve the actual tree oid if $new_tree_oid is a commit + if(false === $this->read_object($new_tree_oid)) { + $this->last_error = 'Failed to read object: ' . $new_tree_oid; + return false; + } + if($this->get_type() === WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) { + // yield the commit object itself + $parsed_commit = $this->get_parsed_commit(); + yield $parsed_commit['tree']; + $new_tree_oid = $parsed_commit['tree']; } - } - public function get_type() { - return $this->type; - } + // Resolve the actual tree oid if $old_tree_oid is a commit + if($old_tree_oid) { + if(false === $this->read_object($old_tree_oid)) { + $this->last_error = 'Failed to read object: ' . $old_tree_oid; + return false; + } + if($this->get_type() === WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) { + $old_tree_oid = $this->get_parsed_commit()['tree']; + } + } - public function get_length() { - return $this->length; - } + $stack = [[$new_tree_oid, $old_tree_oid]]; + + while(!empty($stack)) { + list($current_new_oid, $current_old_oid) = array_pop($stack); + + if(false === $this->read_object($current_new_oid)) { + $this->last_error = 'Failed to read object: ' . $current_new_oid; + return false; + } + $new_tree = $this->get_parsed_tree(); + + $old_tree = []; + if($current_old_oid) { + if(false === $this->read_object($current_old_oid)) { + $this->last_error = 'Failed to read object: ' . $current_old_oid; + return false; + } + $old_tree = $this->get_parsed_tree(); + } - public function get_contents() { - return $this->contents; - } + foreach($new_tree as $name => $object) { + // Object is new + if(!isset($old_tree[$name])) { + if(false === $this->read_object($object['sha1'])) { + $this->last_error = 'Failed to read object: ' . $object['sha1']; + return false; + } + yield $object['sha1']; + if($object['mode'] === WP_Git_Pack_Processor::FILE_MODE_DIRECTORY) { + $stack[] = [$object['sha1'], null]; + } + continue; + } - public function get_parsed_commit() { - return $this->parsed_commit; - } + // Object is unchanged + if($object['sha1'] === $old_tree[$name]['sha1']) { + continue; + } - public function get_commit_tree_oid() { - return $this->parsed_commit['tree']; - } + if(false === $this->read_object($object['sha1'])) { + $this->last_error = 'Failed to read object: ' . $object['sha1']; + return false; + } + + yield $object['sha1']; - public function get_parsed_tree() { - return $this->parsed_tree; + if($object['mode'] === WP_Git_Pack_Processor::FILE_MODE_DIRECTORY) { + // Object is a changed directory - add to stack for recursive processing + $stack[] = [$object['sha1'], $old_tree[$name]['sha1']]; + } + } + } } public function set_ref_head($ref, $oid) { @@ -205,6 +368,7 @@ public function commit($changeset, $commit_meta=[]) { // Process blob updates foreach ($updates as $path => $content) { + $path = '/' . ltrim($path, '/'); $blob_oid = $this->add_object(WP_Git_Pack_Processor::OBJECT_TYPE_BLOB, $content); $this->mark_tree_path_changed($changed_trees, dirname($path)); $changed_trees[dirname($path)]['entries'][basename($path)] = [ @@ -216,6 +380,7 @@ public function commit($changeset, $commit_meta=[]) { // Process deletes foreach ($deletes as $path) { + $path = '/' . ltrim($path, '/'); if (!$this->read_by_path(dirname($path))) { _doing_it_wrong(__METHOD__, 'File not found in HEAD: ' . $path, '1.0.0'); return false; @@ -226,6 +391,8 @@ public function commit($changeset, $commit_meta=[]) { // Process tree moves foreach ($move_trees as $old_path => $new_path) { + $old_path = '/' . ltrim($old_path, '/'); + $new_path = '/' . ltrim($new_path, '/'); if (!$this->read_by_path($old_path)) { _doing_it_wrong(__METHOD__, 'Path not found in HEAD: ' . $old_path, '1.0.0'); return false; @@ -244,7 +411,7 @@ public function commit($changeset, $commit_meta=[]) { // Process trees bottom-up recursively $root_tree_oid = $this->commit_tree('/', $changed_trees); - // Create commit object + // Create a new commit object $commit_message = []; $commit_message[] = "tree " . $root_tree_oid; if($this->get_ref_head('HEAD')) { @@ -262,9 +429,22 @@ public function commit($changeset, $commit_meta=[]) { $this->last_error = 'Failed to set HEAD'; return false; } + $this->reset(); return $commit_oid; } + private function reset() { + $this->close_object_stream(); + $this->oid = null; + $this->type = null; + $this->parsed_commit = null; + $this->parsed_tree = null; + $this->called_next_object_chunk = false; + $this->buffered_object_content = null; + $this->object_content_chunk = null; + $this->last_error = null; + } + private function mark_tree_path_changed(&$changed_trees, $path) { while ($path !== '/') { if (!isset($changed_trees[$path])) { @@ -312,4 +492,5 @@ private function commit_tree($path, $changed_trees) { ); } + } diff --git a/packages/playground/data-liberation/src/git/WP_Git_Pack_Processor.php b/packages/playground/data-liberation/src/git/WP_Git_Pack_Processor.php index d2ab5ba0ac..5d61512f3b 100644 --- a/packages/playground/data-liberation/src/git/WP_Git_Pack_Processor.php +++ b/packages/playground/data-liberation/src/git/WP_Git_Pack_Processor.php @@ -269,7 +269,7 @@ static public function parse_commit_message($commit_message) { $parsed = []; foreach($lines as $k => $line) { if(!trim($line)) { - $parsed['message'] = array_slice($lines, $k + 1); + $parsed['message'] = implode("\n", array_slice($lines, $k + 1)); break; } $type_len = strpos($line, ' ');