From 8a75f184da0813e8bb9fc3f8b395ef139b0a6527 Mon Sep 17 00:00:00 2001 From: Vincent Langlet Date: Tue, 10 Dec 2024 21:14:08 +0100 Subject: [PATCH] Add support for inline comment --- src/Token/Token.php | 6 ++ src/Token/Tokenizer.php | 56 +++++++++++++++---- .../Fixtures/ignored_violations.twig | 4 ++ tests/Token/Tokenizer/Fixtures/invalid5.twig | 2 +- tests/Token/Tokenizer/Fixtures/invalid6.twig | 1 - tests/Token/Tokenizer/Fixtures/test17.twig | 4 ++ tests/Token/Tokenizer/TokenizerTest.php | 25 ++++++++- 7 files changed, 83 insertions(+), 15 deletions(-) delete mode 100644 tests/Token/Tokenizer/Fixtures/invalid6.twig create mode 100644 tests/Token/Tokenizer/Fixtures/test17.twig diff --git a/src/Token/Token.php b/src/Token/Token.php index 15be5432..46a92ec8 100644 --- a/src/Token/Token.php +++ b/src/Token/Token.php @@ -44,15 +44,21 @@ final class Token public const COMMENT_EOL_TYPE = 'COMMENT_EOL_TYPE'; public const COMMENT_END_TYPE = 'COMMENT_END_TYPE'; public const NAMED_ARGUMENT_SEPARATOR_TYPE = 'NAMED_ARGUMENT_SEPARATOR_TYPE'; + public const INLINE_COMMENT_START_TYPE = 'INLINE_COMMENT_START_TYPE'; + public const INLINE_COMMENT_TEXT_TYPE = 'INLINE_COMMENT_TEXT_TYPE'; + public const INLINE_COMMENT_WHITESPACE_TYPE = 'INLINE_COMMENT_WHITESPACE_TYPE'; + public const INLINE_COMMENT_TAB_TYPE = 'INLINE_COMMENT_TAB_TYPE'; public const WHITESPACE_TOKENS = [ self::WHITESPACE_TYPE => self::WHITESPACE_TYPE, self::COMMENT_WHITESPACE_TYPE => self::COMMENT_WHITESPACE_TYPE, + self::INLINE_COMMENT_WHITESPACE_TYPE => self::INLINE_COMMENT_WHITESPACE_TYPE, ]; public const TAB_TOKENS = [ self::TAB_TYPE => self::TAB_TYPE, self::COMMENT_TAB_TYPE => self::COMMENT_TAB_TYPE, + self::INLINE_COMMENT_TAB_TYPE => self::INLINE_COMMENT_TAB_TYPE, ]; public const INDENT_TOKENS = self::WHITESPACE_TOKENS + self::TAB_TOKENS; diff --git a/src/Token/Tokenizer.php b/src/Token/Tokenizer.php index 4801dc48..c36a915a 100644 --- a/src/Token/Tokenizer.php +++ b/src/Token/Tokenizer.php @@ -21,6 +21,7 @@ final class Tokenizer implements TokenizerInterface private const STATE_DQ_STRING = 3; private const STATE_INTERPOLATION = 4; private const STATE_COMMENT = 5; + private const STATE_INLINE_COMMENT = 6; public const NAME_PATTERN = '[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*'; public const NUMBER_PATTERN = '[0-9]+(?:\.[0-9]+)?([Ee][+\-][0-9]+)?'; @@ -66,7 +67,7 @@ final class Tokenizer implements TokenizerInterface private array $expressionStarters = []; /** - * @var array, array}> + * @var array, array}> */ private array $state = []; @@ -130,6 +131,9 @@ public function tokenize(Source $source): Tokens case self::STATE_COMMENT: $this->lexComment(); break; + case self::STATE_INLINE_COMMENT: + $this->lexInlineComment(); + break; } if ( @@ -195,7 +199,7 @@ private function isInTernary(): bool } /** - * @return int<0, 5> + * @return int<0, 6> */ private function getState(): int { @@ -205,7 +209,7 @@ private function getState(): int } /** - * @param int<0, 5> $state + * @param int<0, 6> $state */ private function pushState(int $state): void { @@ -346,6 +350,8 @@ private function lexExpression(): void $this->lexString($match[0]); } elseif (1 === preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) { $this->lexStartDqString(); + } elseif ('#' === $currentCode) { + $this->lexStartInlineComment(); } else { throw CannotTokenizeException::unexpectedCharacter($currentCode, $this->line); } @@ -401,7 +407,7 @@ private function lexComment(): void $this->popState(); } else { if (!$this->hasStateParam('ignoredViolations')) { - $comment = substr($this->code, $this->cursor, $match[0][1]); + $comment = substr($this->code, $this->cursor, $match[0][1] - $this->cursor); $this->extractIgnoredViolations($comment); } @@ -410,6 +416,19 @@ private function lexComment(): void } } + private function lexInlineComment(): void + { + if (!$this->hasStateParam('ignoredViolations')) { + preg_match('/(\r\n|\r|\n)/', $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor); + $comment = substr($this->code, $this->cursor, isset($match[0]) ? $match[0][1] - $this->cursor : null); + + $this->extractIgnoredViolations($comment); + $this->processIgnoredViolations(); + } + + $this->lexData(); + } + private function lexDqString(): void { if (1 === preg_match(self::REGEX_INTERPOLATION_START, $this->code, $match, 0, $this->cursor)) { @@ -465,14 +484,16 @@ private function lexData(int $limit = 0): void } elseif (1 === preg_match('/\S+/', $this->code, $match, 0, $this->cursor)) { $value = $match[0]; - // Stop if cursor reaches the next expression starter. - if (0 !== $limit) { - $value = substr($value, 0, $limit - $this->cursor); - } - if (self::STATE_COMMENT === $this->getState()) { $this->pushToken(Token::COMMENT_TEXT_TYPE, $value); + } elseif (self::STATE_INLINE_COMMENT === $this->getState()) { + $this->pushToken(Token::INLINE_COMMENT_TEXT_TYPE, $value); } else { + // Stop if cursor reaches the next expression starter. + if (0 !== $limit) { + $value = substr($value, 0, $limit - $this->cursor); + } + $this->pushToken(Token::TEXT_TYPE, $value); } } else { @@ -514,6 +535,12 @@ private function lexStart(): void $this->pushState($state); } + private function lexStartInlineComment(): void + { + $this->pushToken(Token::INLINE_COMMENT_START_TYPE, '#'); + $this->pushState(self::STATE_INLINE_COMMENT); + } + private function lexStartDqString(): void { $token = $this->pushToken(Token::DQ_STRING_START_TYPE, '"'); @@ -539,6 +566,8 @@ private function lexTab(): void if (self::STATE_COMMENT === $this->getState()) { $this->pushToken(Token::COMMENT_TAB_TYPE, $whitespace); + } elseif (self::STATE_INLINE_COMMENT === $this->getState()) { + $this->pushToken(Token::INLINE_COMMENT_TAB_TYPE, $whitespace); } else { $this->pushToken(Token::TAB_TYPE, $whitespace); } @@ -555,6 +584,8 @@ private function lexWhitespace(): void if (self::STATE_COMMENT === $this->getState()) { $this->pushToken(Token::COMMENT_WHITESPACE_TYPE, $whitespace); + } elseif (self::STATE_INLINE_COMMENT === $this->getState()) { + $this->pushToken(Token::INLINE_COMMENT_WHITESPACE_TYPE, $whitespace); } else { $this->pushToken(Token::WHITESPACE_TYPE, $whitespace); } @@ -564,6 +595,9 @@ private function lexEOL(string $eol): void { if (self::STATE_COMMENT === $this->getState()) { $this->pushToken(Token::COMMENT_EOL_TYPE, $eol); + } elseif (self::STATE_INLINE_COMMENT === $this->getState()) { + $this->pushToken(Token::EOL_TYPE, $eol); + $this->popState(); } else { $this->pushToken(Token::EOL_TYPE, $eol); } @@ -774,8 +808,8 @@ private function getOperatorRegex(Environment $env): string private function extractIgnoredViolations(string $comment): void { $comment = trim($comment); - if (1 === preg_match('/^twig-cs-fixer-disable(|-line|-next-line)\s+([\s\w,.:]*)/i', $comment, $match)) { - $this->setStateParam('ignoredViolations', preg_replace('/\s+/', ',', $match[2]) ?? ''); + if (1 === preg_match('/^twig-cs-fixer-disable(|-line|-next-line)(?:$|\s+([\s\w,.:]*))/i', $comment, $match)) { + $this->setStateParam('ignoredViolations', preg_replace('/\s+/', ',', $match[2] ?? '') ?? ''); $this->setStateParam('ignoredType', trim($match[1], '-')); } else { $this->setStateParam('ignoredViolations', null); diff --git a/tests/Token/Tokenizer/Fixtures/ignored_violations.twig b/tests/Token/Tokenizer/Fixtures/ignored_violations.twig index 858ef03a..01762284 100644 --- a/tests/Token/Tokenizer/Fixtures/ignored_violations.twig +++ b/tests/Token/Tokenizer/Fixtures/ignored_violations.twig @@ -3,3 +3,7 @@ {# twig-cs-fixer-disable-line Foo.Bar #} {# twig-cs-fixer-disable-next-line Foo.Bar Bar.Foo #} {# twig-cs-fixer-disable-next-line #} + +# twig-cs-fixer-disable-next-line +{{ # twig-cs-fixer-disable-next-line +}} diff --git a/tests/Token/Tokenizer/Fixtures/invalid5.twig b/tests/Token/Tokenizer/Fixtures/invalid5.twig index bd96ad81..bbbd6d54 100644 --- a/tests/Token/Tokenizer/Fixtures/invalid5.twig +++ b/tests/Token/Tokenizer/Fixtures/invalid5.twig @@ -1 +1 @@ -{{ "#{p.first #{}}" }} +{# foo diff --git a/tests/Token/Tokenizer/Fixtures/invalid6.twig b/tests/Token/Tokenizer/Fixtures/invalid6.twig deleted file mode 100644 index bbbd6d54..00000000 --- a/tests/Token/Tokenizer/Fixtures/invalid6.twig +++ /dev/null @@ -1 +0,0 @@ -{# foo diff --git a/tests/Token/Tokenizer/Fixtures/test17.twig b/tests/Token/Tokenizer/Fixtures/test17.twig new file mode 100644 index 00000000..1e97e8e0 --- /dev/null +++ b/tests/Token/Tokenizer/Fixtures/test17.twig @@ -0,0 +1,4 @@ +{{ + # Inline comment + foo +}} diff --git a/tests/Token/Tokenizer/TokenizerTest.php b/tests/Token/Tokenizer/TokenizerTest.php index e6709790..c69a9d93 100644 --- a/tests/Token/Tokenizer/TokenizerTest.php +++ b/tests/Token/Tokenizer/TokenizerTest.php @@ -116,6 +116,7 @@ public function testTokenizeIgnoredViolations(): void 'Foo.Bar:5', 'Bar.Foo:5', ':6', + ':9', ], array_map( static fn (ViolationId $validationId) => $validationId->toString(), @@ -814,6 +815,27 @@ public static function tokenizeDataProvider(): iterable 19 => Token::EOF_TYPE, ], ]; + + yield [ + __DIR__.'/Fixtures/test17.twig', + [ + 0 => Token::VAR_START_TYPE, + 1 => Token::EOL_TYPE, + 2 => Token::WHITESPACE_TYPE, + 3 => Token::INLINE_COMMENT_START_TYPE, + 4 => Token::INLINE_COMMENT_WHITESPACE_TYPE, + 5 => Token::INLINE_COMMENT_TEXT_TYPE, + 6 => Token::INLINE_COMMENT_WHITESPACE_TYPE, + 7 => Token::INLINE_COMMENT_TEXT_TYPE, + 8 => Token::EOL_TYPE, + 9 => Token::WHITESPACE_TYPE, + 10 => Token::NAME_TYPE, + 11 => Token::EOL_TYPE, + 12 => Token::VAR_END_TYPE, + 13 => Token::EOL_TYPE, + 14 => Token::EOF_TYPE, + ], + ]; } /** @@ -842,7 +864,6 @@ public static function tokenizeInvalidDataProvider(): iterable yield [__DIR__.'/Fixtures/invalid2.twig', 'Unexpected character "&" at line 4.']; yield [__DIR__.'/Fixtures/invalid3.twig', 'Unclosed "(" at line 1.']; yield [__DIR__.'/Fixtures/invalid4.twig', 'Unexpected character ")" at line 1.']; - yield [__DIR__.'/Fixtures/invalid5.twig', 'Unexpected character "#" at line 1.']; - yield [__DIR__.'/Fixtures/invalid6.twig', 'Unclosed comment at line 1.']; + yield [__DIR__.'/Fixtures/invalid5.twig', 'Unclosed comment at line 1.']; } }