From 8a75f184da0813e8bb9fc3f8b395ef139b0a6527 Mon Sep 17 00:00:00 2001
From: Vincent Langlet <vincentlanglet@hotmail.fr>
Date: Tue, 10 Dec 2024 21:14:08 +0100
Subject: [PATCH] Add support for inline comment

---
 src/Token/Token.php                           |  6 ++
 src/Token/Tokenizer.php                       | 56 +++++++++++++++----
 .../Fixtures/ignored_violations.twig          |  4 ++
 tests/Token/Tokenizer/Fixtures/invalid5.twig  |  2 +-
 tests/Token/Tokenizer/Fixtures/invalid6.twig  |  1 -
 tests/Token/Tokenizer/Fixtures/test17.twig    |  4 ++
 tests/Token/Tokenizer/TokenizerTest.php       | 25 ++++++++-
 7 files changed, 83 insertions(+), 15 deletions(-)
 delete mode 100644 tests/Token/Tokenizer/Fixtures/invalid6.twig
 create mode 100644 tests/Token/Tokenizer/Fixtures/test17.twig

diff --git a/src/Token/Token.php b/src/Token/Token.php
index 15be5432..46a92ec8 100644
--- a/src/Token/Token.php
+++ b/src/Token/Token.php
@@ -44,15 +44,21 @@ final class Token
     public const COMMENT_EOL_TYPE = 'COMMENT_EOL_TYPE';
     public const COMMENT_END_TYPE = 'COMMENT_END_TYPE';
     public const NAMED_ARGUMENT_SEPARATOR_TYPE = 'NAMED_ARGUMENT_SEPARATOR_TYPE';
+    public const INLINE_COMMENT_START_TYPE = 'INLINE_COMMENT_START_TYPE';
+    public const INLINE_COMMENT_TEXT_TYPE = 'INLINE_COMMENT_TEXT_TYPE';
+    public const INLINE_COMMENT_WHITESPACE_TYPE = 'INLINE_COMMENT_WHITESPACE_TYPE';
+    public const INLINE_COMMENT_TAB_TYPE = 'INLINE_COMMENT_TAB_TYPE';
 
     public const WHITESPACE_TOKENS = [
         self::WHITESPACE_TYPE => self::WHITESPACE_TYPE,
         self::COMMENT_WHITESPACE_TYPE => self::COMMENT_WHITESPACE_TYPE,
+        self::INLINE_COMMENT_WHITESPACE_TYPE => self::INLINE_COMMENT_WHITESPACE_TYPE,
     ];
 
     public const TAB_TOKENS = [
         self::TAB_TYPE => self::TAB_TYPE,
         self::COMMENT_TAB_TYPE => self::COMMENT_TAB_TYPE,
+        self::INLINE_COMMENT_TAB_TYPE => self::INLINE_COMMENT_TAB_TYPE,
     ];
 
     public const INDENT_TOKENS = self::WHITESPACE_TOKENS + self::TAB_TOKENS;
diff --git a/src/Token/Tokenizer.php b/src/Token/Tokenizer.php
index 4801dc48..c36a915a 100644
--- a/src/Token/Tokenizer.php
+++ b/src/Token/Tokenizer.php
@@ -21,6 +21,7 @@ final class Tokenizer implements TokenizerInterface
     private const STATE_DQ_STRING = 3;
     private const STATE_INTERPOLATION = 4;
     private const STATE_COMMENT = 5;
+    private const STATE_INLINE_COMMENT = 6;
 
     public const NAME_PATTERN = '[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*';
     public const NUMBER_PATTERN = '[0-9]+(?:\.[0-9]+)?([Ee][+\-][0-9]+)?';
@@ -66,7 +67,7 @@ final class Tokenizer implements TokenizerInterface
     private array $expressionStarters = [];
 
     /**
-     * @var array<array{int<0, 5>, array<string, string|null>}>
+     * @var array<array{int<0, 6>, array<string, string|null>}>
      */
     private array $state = [];
 
@@ -130,6 +131,9 @@ public function tokenize(Source $source): Tokens
                 case self::STATE_COMMENT:
                     $this->lexComment();
                     break;
+                case self::STATE_INLINE_COMMENT:
+                    $this->lexInlineComment();
+                    break;
             }
 
             if (
@@ -195,7 +199,7 @@ private function isInTernary(): bool
     }
 
     /**
-     * @return int<0, 5>
+     * @return int<0, 6>
      */
     private function getState(): int
     {
@@ -205,7 +209,7 @@ private function getState(): int
     }
 
     /**
-     * @param int<0, 5> $state
+     * @param int<0, 6> $state
      */
     private function pushState(int $state): void
     {
@@ -346,6 +350,8 @@ private function lexExpression(): void
             $this->lexString($match[0]);
         } elseif (1 === preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
             $this->lexStartDqString();
+        } elseif ('#' === $currentCode) {
+            $this->lexStartInlineComment();
         } else {
             throw CannotTokenizeException::unexpectedCharacter($currentCode, $this->line);
         }
@@ -401,7 +407,7 @@ private function lexComment(): void
             $this->popState();
         } else {
             if (!$this->hasStateParam('ignoredViolations')) {
-                $comment = substr($this->code, $this->cursor, $match[0][1]);
+                $comment = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
                 $this->extractIgnoredViolations($comment);
             }
 
@@ -410,6 +416,19 @@ private function lexComment(): void
         }
     }
 
+    private function lexInlineComment(): void
+    {
+        if (!$this->hasStateParam('ignoredViolations')) {
+            preg_match('/(\r\n|\r|\n)/', $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor);
+            $comment = substr($this->code, $this->cursor, isset($match[0]) ? $match[0][1] - $this->cursor : null);
+
+            $this->extractIgnoredViolations($comment);
+            $this->processIgnoredViolations();
+        }
+
+        $this->lexData();
+    }
+
     private function lexDqString(): void
     {
         if (1 === preg_match(self::REGEX_INTERPOLATION_START, $this->code, $match, 0, $this->cursor)) {
@@ -465,14 +484,16 @@ private function lexData(int $limit = 0): void
         } elseif (1 === preg_match('/\S+/', $this->code, $match, 0, $this->cursor)) {
             $value = $match[0];
 
-            // Stop if cursor reaches the next expression starter.
-            if (0 !== $limit) {
-                $value = substr($value, 0, $limit - $this->cursor);
-            }
-
             if (self::STATE_COMMENT === $this->getState()) {
                 $this->pushToken(Token::COMMENT_TEXT_TYPE, $value);
+            } elseif (self::STATE_INLINE_COMMENT === $this->getState()) {
+                $this->pushToken(Token::INLINE_COMMENT_TEXT_TYPE, $value);
             } else {
+                // Stop if cursor reaches the next expression starter.
+                if (0 !== $limit) {
+                    $value = substr($value, 0, $limit - $this->cursor);
+                }
+
                 $this->pushToken(Token::TEXT_TYPE, $value);
             }
         } else {
@@ -514,6 +535,12 @@ private function lexStart(): void
         $this->pushState($state);
     }
 
+    private function lexStartInlineComment(): void
+    {
+        $this->pushToken(Token::INLINE_COMMENT_START_TYPE, '#');
+        $this->pushState(self::STATE_INLINE_COMMENT);
+    }
+
     private function lexStartDqString(): void
     {
         $token = $this->pushToken(Token::DQ_STRING_START_TYPE, '"');
@@ -539,6 +566,8 @@ private function lexTab(): void
 
         if (self::STATE_COMMENT === $this->getState()) {
             $this->pushToken(Token::COMMENT_TAB_TYPE, $whitespace);
+        } elseif (self::STATE_INLINE_COMMENT === $this->getState()) {
+            $this->pushToken(Token::INLINE_COMMENT_TAB_TYPE, $whitespace);
         } else {
             $this->pushToken(Token::TAB_TYPE, $whitespace);
         }
@@ -555,6 +584,8 @@ private function lexWhitespace(): void
 
         if (self::STATE_COMMENT === $this->getState()) {
             $this->pushToken(Token::COMMENT_WHITESPACE_TYPE, $whitespace);
+        } elseif (self::STATE_INLINE_COMMENT === $this->getState()) {
+            $this->pushToken(Token::INLINE_COMMENT_WHITESPACE_TYPE, $whitespace);
         } else {
             $this->pushToken(Token::WHITESPACE_TYPE, $whitespace);
         }
@@ -564,6 +595,9 @@ private function lexEOL(string $eol): void
     {
         if (self::STATE_COMMENT === $this->getState()) {
             $this->pushToken(Token::COMMENT_EOL_TYPE, $eol);
+        } elseif (self::STATE_INLINE_COMMENT === $this->getState()) {
+            $this->pushToken(Token::EOL_TYPE, $eol);
+            $this->popState();
         } else {
             $this->pushToken(Token::EOL_TYPE, $eol);
         }
@@ -774,8 +808,8 @@ private function getOperatorRegex(Environment $env): string
     private function extractIgnoredViolations(string $comment): void
     {
         $comment = trim($comment);
-        if (1 === preg_match('/^twig-cs-fixer-disable(|-line|-next-line)\s+([\s\w,.:]*)/i', $comment, $match)) {
-            $this->setStateParam('ignoredViolations', preg_replace('/\s+/', ',', $match[2]) ?? '');
+        if (1 === preg_match('/^twig-cs-fixer-disable(|-line|-next-line)(?:$|\s+([\s\w,.:]*))/i', $comment, $match)) {
+            $this->setStateParam('ignoredViolations', preg_replace('/\s+/', ',', $match[2] ?? '') ?? '');
             $this->setStateParam('ignoredType', trim($match[1], '-'));
         } else {
             $this->setStateParam('ignoredViolations', null);
diff --git a/tests/Token/Tokenizer/Fixtures/ignored_violations.twig b/tests/Token/Tokenizer/Fixtures/ignored_violations.twig
index 858ef03a..01762284 100644
--- a/tests/Token/Tokenizer/Fixtures/ignored_violations.twig
+++ b/tests/Token/Tokenizer/Fixtures/ignored_violations.twig
@@ -3,3 +3,7 @@
 {# twig-cs-fixer-disable-line Foo.Bar #}
 {# twig-cs-fixer-disable-next-line Foo.Bar Bar.Foo #}
 {# twig-cs-fixer-disable-next-line #}
+
+# twig-cs-fixer-disable-next-line
+{{ # twig-cs-fixer-disable-next-line
+}}
diff --git a/tests/Token/Tokenizer/Fixtures/invalid5.twig b/tests/Token/Tokenizer/Fixtures/invalid5.twig
index bd96ad81..bbbd6d54 100644
--- a/tests/Token/Tokenizer/Fixtures/invalid5.twig
+++ b/tests/Token/Tokenizer/Fixtures/invalid5.twig
@@ -1 +1 @@
-{{ "#{p.first #{}}" }}
+{# foo
diff --git a/tests/Token/Tokenizer/Fixtures/invalid6.twig b/tests/Token/Tokenizer/Fixtures/invalid6.twig
deleted file mode 100644
index bbbd6d54..00000000
--- a/tests/Token/Tokenizer/Fixtures/invalid6.twig
+++ /dev/null
@@ -1 +0,0 @@
-{# foo
diff --git a/tests/Token/Tokenizer/Fixtures/test17.twig b/tests/Token/Tokenizer/Fixtures/test17.twig
new file mode 100644
index 00000000..1e97e8e0
--- /dev/null
+++ b/tests/Token/Tokenizer/Fixtures/test17.twig
@@ -0,0 +1,4 @@
+{{
+    # Inline comment
+    foo
+}}
diff --git a/tests/Token/Tokenizer/TokenizerTest.php b/tests/Token/Tokenizer/TokenizerTest.php
index e6709790..c69a9d93 100644
--- a/tests/Token/Tokenizer/TokenizerTest.php
+++ b/tests/Token/Tokenizer/TokenizerTest.php
@@ -116,6 +116,7 @@ public function testTokenizeIgnoredViolations(): void
                 'Foo.Bar:5',
                 'Bar.Foo:5',
                 ':6',
+                ':9',
             ],
             array_map(
                 static fn (ViolationId $validationId) => $validationId->toString(),
@@ -814,6 +815,27 @@ public static function tokenizeDataProvider(): iterable
                 19 => Token::EOF_TYPE,
             ],
         ];
+
+        yield [
+            __DIR__.'/Fixtures/test17.twig',
+            [
+                0 => Token::VAR_START_TYPE,
+                1 => Token::EOL_TYPE,
+                2 => Token::WHITESPACE_TYPE,
+                3 => Token::INLINE_COMMENT_START_TYPE,
+                4 => Token::INLINE_COMMENT_WHITESPACE_TYPE,
+                5 => Token::INLINE_COMMENT_TEXT_TYPE,
+                6 => Token::INLINE_COMMENT_WHITESPACE_TYPE,
+                7 => Token::INLINE_COMMENT_TEXT_TYPE,
+                8 => Token::EOL_TYPE,
+                9 => Token::WHITESPACE_TYPE,
+                10 => Token::NAME_TYPE,
+                11 => Token::EOL_TYPE,
+                12 => Token::VAR_END_TYPE,
+                13 => Token::EOL_TYPE,
+                14 => Token::EOF_TYPE,
+            ],
+        ];
     }
 
     /**
@@ -842,7 +864,6 @@ public static function tokenizeInvalidDataProvider(): iterable
         yield [__DIR__.'/Fixtures/invalid2.twig', 'Unexpected character "&" at line 4.'];
         yield [__DIR__.'/Fixtures/invalid3.twig', 'Unclosed "(" at line 1.'];
         yield [__DIR__.'/Fixtures/invalid4.twig', 'Unexpected character ")" at line 1.'];
-        yield [__DIR__.'/Fixtures/invalid5.twig', 'Unexpected character "#" at line 1.'];
-        yield [__DIR__.'/Fixtures/invalid6.twig', 'Unclosed comment at line 1.'];
+        yield [__DIR__.'/Fixtures/invalid5.twig', 'Unclosed comment at line 1.'];
     }
 }