From 1e57e719d72be9f6e15133b1997f675e34b26881 Mon Sep 17 00:00:00 2001 From: Rahul Dey Date: Sun, 9 Jun 2024 18:12:46 +0530 Subject: [PATCH] styles fiexed :lipstick: --- src/Bpe.php | 10 +-- src/Contracts/BpeContract.php | 4 +- src/Encoder.php | 40 +++++------ .../OpenAiPublic/AbstractEncoding.php | 2 - .../OpenAiPublic/Cl100KBaseEncoding.php | 2 +- src/Encodings/OpenAiPublic/Gpt2Encoding.php | 4 +- .../OpenAiPublic/P50KBaseEncoding.php | 4 +- .../OpenAiPublic/P50KEditEncoding.php | 4 +- .../OpenAiPublic/R50KBaseEncoding.php | 4 +- src/Enums/SpecialToken.php | 4 +- src/Loaders/DataGymLoader.php | 5 +- src/Loaders/TiktokenLoader.php | 5 +- src/Readers/HttpReader.php | 4 +- src/Registry.php | 4 +- src/Utils/ArrayUtil.php | 15 ++-- src/Utils/EncoderUtil.php | 4 +- src/Vocab.php | 2 +- tests/Bpe.php | 70 +++++++++---------- tests/Encoder.php | 12 ++-- tests/Loaders/Loader.php | 3 +- tests/Registry.php | 10 +-- tests/Tiktoken.php | 4 +- tests/Vocab.php | 2 +- 23 files changed, 94 insertions(+), 124 deletions(-) diff --git a/src/Bpe.php b/src/Bpe.php index ea36136..39013af 100644 --- a/src/Bpe.php +++ b/src/Bpe.php @@ -17,9 +17,8 @@ final class Bpe implements BpeContract private readonly string $specialRegex; /** - * @param Vocab $vocab - * @param array $specialTokens - * @param string $regex + * @param array $specialTokens + * * @throws Exceptions\InvalidPatternException */ public function __construct( @@ -31,7 +30,7 @@ public function __construct( } /** - * @inheritDoc + * {@inheritDoc} */ public function encode(string $text, array $allowedSpecial): array { @@ -98,7 +97,8 @@ public function encode(string $text, array $allowedSpecial): array } /** - * @inheritDoc + * {@inheritDoc} + * * @throws Exception */ public function encodeOrdinary(string $text): array diff --git a/src/Contracts/BpeContract.php b/src/Contracts/BpeContract.php index c4146f9..a20152d 100644 --- a/src/Contracts/BpeContract.php +++ b/src/Contracts/BpeContract.php @@ -7,14 +7,12 @@ interface BpeContract { /** - * @param string $text - * @param string[] $allowedSpecial + * @param string[] $allowedSpecial * @return array{0: int[], 1, int} */ public function encode(string $text, array $allowedSpecial): array; /** - * @param string $text * @return int[] */ public function encodeOrdinary(string $text): array; diff --git a/src/Encoder.php b/src/Encoder.php index c1a407a..6cb4360 100644 --- a/src/Encoder.php +++ b/src/Encoder.php @@ -14,12 +14,7 @@ class Encoder protected int $maxTokenValue; /** - * @param string $name - * @param string $pattern - * @param Vocab $vocab - * @param array $specialTokens - * @param int|null $vocabLength - * @param BpeContract|null $bpe + * @param array $specialTokens */ public function __construct( public readonly string $name, @@ -29,14 +24,14 @@ public function __construct( public readonly ?int $vocabLength = null, protected ?BpeContract $bpe = null, ) { - if(is_null($this->bpe)) { + if (is_null($this->bpe)) { $this->initializeBpe(); } } /** - * @param string $text * @return int[] + * * @throws \Exception */ public function encodeOrdinary(string $text): array @@ -45,8 +40,9 @@ public function encodeOrdinary(string $text): array } /** - * @param string[] $texts + * @param string[] $texts * @return array + * * @throws \Exception */ public function encodeOrdinaryBatch(array $texts): array @@ -61,10 +57,9 @@ public function encodeOrdinaryBatch(array $texts): array } /** - * @param string $text - * @param string[]|'all' $allowedSpecial - * @param string $disallowedSpecial + * @param string[]|'all' $allowedSpecial * @return int[] + * * @throws Exceptions\InvalidPatternException * @throws SpecialTokenNotAllowedException */ @@ -91,10 +86,10 @@ public function encode(string $text, array|string $allowedSpecial = [], string $ } /** - * @param array $texts - * @param string[]|'all' $allowedSpecial - * @param string $disallowedSpecial + * @param array $texts + * @param string[]|'all' $allowedSpecial * @return array + * * @throws Exceptions\InvalidPatternException * @throws SpecialTokenNotAllowedException */ @@ -110,8 +105,8 @@ public function encodeBatch(array $texts, array|string $allowedSpecial = [], str } /** - * @param int[] $tokens - * @return string + * @param int[] $tokens + * * @throws RankNotFoundException */ public function decode(array $tokens): string @@ -119,12 +114,12 @@ public function decode(array $tokens): string $text = ''; foreach ($tokens as $token) { - try{ + try { $text .= $this->vocab->getToken($token); - }catch (RankNotFoundException $exception){ + } catch (RankNotFoundException $exception) { $piece = array_search($token, $this->specialTokens); - if(! $piece) { + if (! $piece) { throw $exception; } @@ -136,8 +131,9 @@ public function decode(array $tokens): string } /** - * @param array $batch + * @param array $batch * @return array + * * @throws RankNotFoundException */ public function decodeBatch(array $batch): array @@ -183,7 +179,7 @@ public function setBpe(Bpe $bpe): void public function getBpe(): BpeContract { - if(is_null($this->bpe)) { + if (is_null($this->bpe)) { throw new \Exception('Bpe Not Found'); } diff --git a/src/Encodings/OpenAiPublic/AbstractEncoding.php b/src/Encodings/OpenAiPublic/AbstractEncoding.php index 9e6ef76..613a874 100644 --- a/src/Encodings/OpenAiPublic/AbstractEncoding.php +++ b/src/Encodings/OpenAiPublic/AbstractEncoding.php @@ -6,7 +6,6 @@ use Rahul900day\Tiktoken\Contracts\EncodingContract; use Rahul900day\Tiktoken\Encoder; -use Rahul900day\Tiktoken\Enums\SpecialToken; use Rahul900day\Tiktoken\Vocab; abstract class AbstractEncoding implements EncodingContract @@ -34,5 +33,4 @@ public function __invoke(): Encoder $this->vocabLength, ); } - } diff --git a/src/Encodings/OpenAiPublic/Cl100KBaseEncoding.php b/src/Encodings/OpenAiPublic/Cl100KBaseEncoding.php index 04c1823..775f3d1 100644 --- a/src/Encodings/OpenAiPublic/Cl100KBaseEncoding.php +++ b/src/Encodings/OpenAiPublic/Cl100KBaseEncoding.php @@ -29,7 +29,7 @@ protected function getPattern(): string } /** - * @inheritDoc + * {@inheritDoc} */ protected function getSpecialTokens(): array { diff --git a/src/Encodings/OpenAiPublic/Gpt2Encoding.php b/src/Encodings/OpenAiPublic/Gpt2Encoding.php index 9c1ee3a..b2f0225 100644 --- a/src/Encodings/OpenAiPublic/Gpt2Encoding.php +++ b/src/Encodings/OpenAiPublic/Gpt2Encoding.php @@ -4,8 +4,6 @@ namespace Rahul900day\Tiktoken\Encodings\OpenAiPublic; -use Rahul900day\Tiktoken\Contracts\EncodingContract; -use Rahul900day\Tiktoken\Encoder; use Rahul900day\Tiktoken\Enums\SpecialToken; use Rahul900day\Tiktoken\Loaders\DataGymLoader; use Rahul900day\Tiktoken\Readers\HttpReader; @@ -33,7 +31,7 @@ protected function getPattern(): string } /** - * @inheritDoc + * {@inheritDoc} */ protected function getSpecialTokens(): array { diff --git a/src/Encodings/OpenAiPublic/P50KBaseEncoding.php b/src/Encodings/OpenAiPublic/P50KBaseEncoding.php index 8289bd4..cb3848f 100644 --- a/src/Encodings/OpenAiPublic/P50KBaseEncoding.php +++ b/src/Encodings/OpenAiPublic/P50KBaseEncoding.php @@ -4,8 +4,6 @@ namespace Rahul900day\Tiktoken\Encodings\OpenAiPublic; -use Rahul900day\Tiktoken\Contracts\EncodingContract; -use Rahul900day\Tiktoken\Encoder; use Rahul900day\Tiktoken\Enums\SpecialToken; use Rahul900day\Tiktoken\Loaders\TiktokenLoader; use Rahul900day\Tiktoken\Readers\HttpReader; @@ -33,7 +31,7 @@ protected function getPattern(): string } /** - * @inheritDoc + * {@inheritDoc} */ protected function getSpecialTokens(): array { diff --git a/src/Encodings/OpenAiPublic/P50KEditEncoding.php b/src/Encodings/OpenAiPublic/P50KEditEncoding.php index e44e2ce..065edb7 100644 --- a/src/Encodings/OpenAiPublic/P50KEditEncoding.php +++ b/src/Encodings/OpenAiPublic/P50KEditEncoding.php @@ -4,8 +4,6 @@ namespace Rahul900day\Tiktoken\Encodings\OpenAiPublic; -use Rahul900day\Tiktoken\Contracts\EncodingContract; -use Rahul900day\Tiktoken\Encoder; use Rahul900day\Tiktoken\Enums\SpecialToken; use Rahul900day\Tiktoken\Loaders\TiktokenLoader; use Rahul900day\Tiktoken\Readers\HttpReader; @@ -31,7 +29,7 @@ protected function getPattern(): string } /** - * @inheritDoc + * {@inheritDoc} */ protected function getSpecialTokens(): array { diff --git a/src/Encodings/OpenAiPublic/R50KBaseEncoding.php b/src/Encodings/OpenAiPublic/R50KBaseEncoding.php index 48c83cb..81cb172 100644 --- a/src/Encodings/OpenAiPublic/R50KBaseEncoding.php +++ b/src/Encodings/OpenAiPublic/R50KBaseEncoding.php @@ -4,8 +4,6 @@ namespace Rahul900day\Tiktoken\Encodings\OpenAiPublic; -use Rahul900day\Tiktoken\Contracts\EncodingContract; -use Rahul900day\Tiktoken\Encoder; use Rahul900day\Tiktoken\Enums\SpecialToken; use Rahul900day\Tiktoken\Loaders\TiktokenLoader; use Rahul900day\Tiktoken\Readers\HttpReader; @@ -33,7 +31,7 @@ protected function getPattern(): string } /** - * @inheritDoc + * {@inheritDoc} */ protected function getSpecialTokens(): array { diff --git a/src/Enums/SpecialToken.php b/src/Enums/SpecialToken.php index 6c2aa42..2cb3359 100644 --- a/src/Enums/SpecialToken.php +++ b/src/Enums/SpecialToken.php @@ -16,8 +16,8 @@ enum SpecialToken: string case ENDOFPROMPT = '<|endofprompt|>'; /** - * @param array $tokens - * @return string + * @param array $tokens + * * @throws InvalidPatternException */ public static function getRegex(array $tokens): string diff --git a/src/Loaders/DataGymLoader.php b/src/Loaders/DataGymLoader.php index 1e1f779..ed10164 100644 --- a/src/Loaders/DataGymLoader.php +++ b/src/Loaders/DataGymLoader.php @@ -10,11 +10,8 @@ final class DataGymLoader extends Loader { /** - * @param string $vocabBpeFile - * @param string $encoderJsonFile - * @param string|null $vocabBpeHash - * @param string|null $encoderJsonHash * @return non-empty-array + * * @throws \Rahul900day\Tiktoken\Exceptions\InvalidChecksumException */ public function load( diff --git a/src/Loaders/TiktokenLoader.php b/src/Loaders/TiktokenLoader.php index 9e0cb74..db47611 100644 --- a/src/Loaders/TiktokenLoader.php +++ b/src/Loaders/TiktokenLoader.php @@ -7,9 +7,8 @@ final class TiktokenLoader extends Loader { /** - * @param string $bpeFile - * @param string|null $expectedHash * @return non-empty-array + * * @throws \Rahul900day\Tiktoken\Exceptions\InvalidChecksumException */ public function load(string $bpeFile, ?string $expectedHash = null): array @@ -26,7 +25,7 @@ public function load(string $bpeFile, ?string $expectedHash = null): array $result[base64_decode($token)] = intval($rank); } - if(count($result) === 0) { + if (count($result) === 0) { throw new \Exception('Invalid tiktoken'); } diff --git a/src/Readers/HttpReader.php b/src/Readers/HttpReader.php index 9232a05..d2f043e 100644 --- a/src/Readers/HttpReader.php +++ b/src/Readers/HttpReader.php @@ -26,9 +26,9 @@ public static function create(?ClientInterface $client = null): HttpReader public function read(string|RequestInterface $location): string { - if(is_string($location)) { + if (is_string($location)) { $request = (new Psr17Factory())->createRequest('GET', $location); - }else { + } else { $request = $location; } diff --git a/src/Registry.php b/src/Registry.php index 911ba98..3cae770 100644 --- a/src/Registry.php +++ b/src/Registry.php @@ -22,7 +22,7 @@ class Registry /** @var array */ protected static array $encodings = []; - /** @var non-empty-array */ + /** @var non-empty-array */ public static array $defaultEncodings = [ 'gpt2' => [Gpt2Encoding::class, []], 'r50k_base' => [R50KBaseEncoding::class, []], @@ -33,7 +33,7 @@ class Registry protected static function registerEncoding(string $name, EncodingContract|Closure $encoding): void { - if(isset(self::$resolvedEncodings[$name])) { + if (isset(self::$resolvedEncodings[$name])) { unset(self::$resolvedEncodings[$name]); } diff --git a/src/Utils/ArrayUtil.php b/src/Utils/ArrayUtil.php index 439cdba..8c41d7c 100644 --- a/src/Utils/ArrayUtil.php +++ b/src/Utils/ArrayUtil.php @@ -11,9 +11,8 @@ final class ArrayUtil /** * @template TKey * @template TValue - * @param array $array - * @param int $at - * @return mixed + * + * @param array $array */ public static function &at(array &$array, int $at): mixed { @@ -25,9 +24,8 @@ public static function &at(array &$array, int $at): mixed /** * @template TKey * @template TValue - * @param array $array - * @param int $at - * @return void + * + * @param array $array */ public static function unsetAt(array &$array, int $at): void { @@ -39,9 +37,8 @@ public static function unsetAt(array &$array, int $at): void /** * @template TKey * @template TValue - * @param non-empty-array $array - * @param int $start - * @param int $end + * + * @param non-empty-array $array * @return array */ public static function getSegment(array $array, int $start, int $end): array diff --git a/src/Utils/EncoderUtil.php b/src/Utils/EncoderUtil.php index 8e7b244..a340ee7 100644 --- a/src/Utils/EncoderUtil.php +++ b/src/Utils/EncoderUtil.php @@ -7,7 +7,6 @@ final class EncoderUtil { /** - * @param string $string * @return array */ public static function toBytes(string $string): array @@ -16,8 +15,7 @@ public static function toBytes(string $string): array } /** - * @param array $bytes - * @return string + * @param array $bytes */ public static function fromBytes(array $bytes): string { diff --git a/src/Vocab.php b/src/Vocab.php index 4509e0b..273a73b 100644 --- a/src/Vocab.php +++ b/src/Vocab.php @@ -15,7 +15,7 @@ final class Vocab implements Countable public readonly array $rankToTokens; /** - * @param non-empty-array $tokenToRanks + * @param non-empty-array $tokenToRanks */ public function __construct(public readonly array $tokenToRanks) { diff --git a/tests/Bpe.php b/tests/Bpe.php index 2a18362..23692d3 100644 --- a/tests/Bpe.php +++ b/tests/Bpe.php @@ -5,43 +5,43 @@ test('simple', function () { $encoding = Tiktoken::getEncoding('gpt2'); - expect($encoding->encode("hello world"))->toBe([31373, 995]) + expect($encoding->encode('hello world'))->toBe([31373, 995]) ->and($encoding->decode([31373, 995]))->toBe('hello world') - ->and($encoding->encode("hello <|endoftext|>", allowedSpecial: 'all'))->toBe([31373, 220, 50256]); + ->and($encoding->encode('hello <|endoftext|>', allowedSpecial: 'all'))->toBe([31373, 220, 50256]); $encoding = Tiktoken::getEncoding('cl100k_base'); - expect($encoding->encode("hello world"))->toBe([15339, 1917]) + expect($encoding->encode('hello world'))->toBe([15339, 1917]) ->and($encoding->decode([15339, 1917]))->toBe('hello world') - ->and($encoding->encode("hello <|endoftext|>", allowedSpecial: 'all'))->toBe([15339, 220, 100257]); + ->and($encoding->encode('hello <|endoftext|>', allowedSpecial: 'all'))->toBe([15339, 220, 100257]); }); test('simple repeated', function () { $encoding = Tiktoken::getEncoding('gpt2'); - expect($encoding->encode("0"))->toBe([15]) - ->and($encoding->encode("00"))->toBe([405]) - ->and($encoding->encode("000"))->toBe([830]) - ->and($encoding->encode("0000"))->toBe([2388]) - ->and($encoding->encode("00000"))->toBe([20483]) - ->and($encoding->encode("000000"))->toBe([10535]) - ->and($encoding->encode("0000000"))->toBe([24598]) - ->and($encoding->encode("00000000"))->toBe([8269]) - ->and($encoding->encode("000000000"))->toBe([10535, 830]) - ->and($encoding->encode("0000000000"))->toBe([8269, 405]) - ->and($encoding->encode("00000000000"))->toBe([8269, 830]) - ->and($encoding->encode("000000000000"))->toBe([8269, 2388]) - ->and($encoding->encode("0000000000000"))->toBe([8269, 20483]) - ->and($encoding->encode("00000000000000"))->toBe([8269, 10535]) - ->and($encoding->encode("000000000000000"))->toBe([8269, 24598]) - ->and($encoding->encode("0000000000000000"))->toBe([25645]) - ->and($encoding->encode("00000000000000000"))->toBe([8269, 10535, 830]); + expect($encoding->encode('0'))->toBe([15]) + ->and($encoding->encode('00'))->toBe([405]) + ->and($encoding->encode('000'))->toBe([830]) + ->and($encoding->encode('0000'))->toBe([2388]) + ->and($encoding->encode('00000'))->toBe([20483]) + ->and($encoding->encode('000000'))->toBe([10535]) + ->and($encoding->encode('0000000'))->toBe([24598]) + ->and($encoding->encode('00000000'))->toBe([8269]) + ->and($encoding->encode('000000000'))->toBe([10535, 830]) + ->and($encoding->encode('0000000000'))->toBe([8269, 405]) + ->and($encoding->encode('00000000000'))->toBe([8269, 830]) + ->and($encoding->encode('000000000000'))->toBe([8269, 2388]) + ->and($encoding->encode('0000000000000'))->toBe([8269, 20483]) + ->and($encoding->encode('00000000000000'))->toBe([8269, 10535]) + ->and($encoding->encode('000000000000000'))->toBe([8269, 24598]) + ->and($encoding->encode('0000000000000000'))->toBe([25645]) + ->and($encoding->encode('00000000000000000'))->toBe([8269, 10535, 830]); }); test('simple regex', function () { $encoding = Tiktoken::getEncoding('cl100k_base'); - expect($encoding->encode("rer"))->toBe([38149]) + expect($encoding->encode('rer'))->toBe([38149]) ->and($encoding->encode("'rer"))->toBe([2351, 81]) ->and($encoding->encode("today\n "))->toBe([31213, 198, 220]) ->and($encoding->encode("today\n \n"))->toBe([31213, 27907]) @@ -50,34 +50,34 @@ test('basic encode', function () { $encoding = Tiktoken::getEncoding('r50k_base'); - expect($encoding->encode("hello world"))->toBe([31373, 995]); + expect($encoding->encode('hello world'))->toBe([31373, 995]); $encoding = Tiktoken::getEncoding('p50k_base'); - expect($encoding->encode("hello world"))->toBe([31373, 995]); + expect($encoding->encode('hello world'))->toBe([31373, 995]); $encoding = Tiktoken::getEncoding('cl100k_base'); - expect($encoding->encode("hello world"))->toBe([15339, 1917]); + expect($encoding->encode('hello world'))->toBe([15339, 1917]); -// ->and($encoding->encode(' \x850'))->toBe([220, 126, 227, 15]) + // ->and($encoding->encode(' \x850'))->toBe([220, 126, 227, 15]) }); test('encode empty', function () { $encoding = Tiktoken::getEncoding('r50k_base'); - expect($encoding->encode(""))->toBe([]); + expect($encoding->encode(''))->toBe([]); }); test('basic round-trip', function (string $encodingName) { $encoding = Tiktoken::getEncoding($encodingName); $values = [ - "hello", - "hello ", - "hello ", - " hello", - " hello ", - " hello ", - "hello world", - "请考试我的软件!12345", + 'hello', + 'hello ', + 'hello ', + ' hello', + ' hello ', + ' hello ', + 'hello world', + '请考试我的软件!12345', ]; foreach ($values as $value) { diff --git a/tests/Encoder.php b/tests/Encoder.php index 40e9089..a464a36 100644 --- a/tests/Encoder.php +++ b/tests/Encoder.php @@ -30,13 +30,13 @@ ->andReturn([[1, 2, 3], 0]) ->once(); - $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); + $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); expect($encoder->encode('Fake'))->toBe([1, 2, 3]); }); it('can decode ranks', function () { - $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); + $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); expect($encoder->decode([1, 2, 3]))->toBe('abc'); }); @@ -108,13 +108,13 @@ ->andReturn([[1, 2, 3], 0], [[4, 5, 6], 0]) ->twice(); - $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); + $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); expect($encoder->encodeBatch(['Fake', 'Fake1']))->toBe([[1, 2, 3], [4, 5, 6]]); }); it('can decode batch', function () { - $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); + $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); expect($encoder->decodeBatch([[1, 2, 3], [4, 5, 6]]))->toBe(['abc', 'def']); }); @@ -124,7 +124,7 @@ ->andReturn([1, 2, 3]) ->once(); - $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); + $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); expect($encoder->encodeOrdinary('Fake'))->toBe([1, 2, 3]); }); @@ -134,7 +134,7 @@ ->andReturn([1, 2, 3], [4, 5, 6]) ->twice(); - $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); + $encoder = new Encoder('gpt-fake', '', $this->vocab, [], 10, $this->bpe); expect($encoder->encodeOrdinaryBatch(['Fake', 'Fake1']))->toBe([[1, 2, 3], [4, 5, 6]]); }); diff --git a/tests/Loaders/Loader.php b/tests/Loaders/Loader.php index 33581dd..5fa7b15 100644 --- a/tests/Loaders/Loader.php +++ b/tests/Loaders/Loader.php @@ -9,7 +9,8 @@ $this->reader = mock(ReaderContract::class); $this->cache = mock(CacheContract::class); - $this->loader = new class($this->reader, $this->cache) extends Loader { + $this->loader = new class($this->reader, $this->cache) extends Loader + { public function load(string $path, ?string $hash = null) { return $this->readFileCached($path, $hash); diff --git a/tests/Registry.php b/tests/Registry.php index e9081d9..c111c5b 100644 --- a/tests/Registry.php +++ b/tests/Registry.php @@ -1,11 +1,7 @@ not->toBe(spl_object_hash($encoder2)); - Registry::registerCustomEncoding('gpt-fake', fn() => $encoder1); + Registry::registerCustomEncoding('gpt-fake', fn () => $encoder1); expect(spl_object_hash(Registry::getEncoding('gpt-fake'))) ->toBe(spl_object_hash($encoder1)); - Registry::registerCustomEncoding('gpt-fake', fn() => $encoder2); + Registry::registerCustomEncoding('gpt-fake', fn () => $encoder2); expect(spl_object_hash(Registry::getEncoding('gpt-fake'))) ->toBe(spl_object_hash($encoder2)); }); - - diff --git a/tests/Tiktoken.php b/tests/Tiktoken.php index 7449df5..683f1f1 100644 --- a/tests/Tiktoken.php +++ b/tests/Tiktoken.php @@ -10,7 +10,7 @@ expect($encoding)->toBeInstanceOf(Encoder::class); })->with([ 'cl100k_base', - 'gpt2' + 'gpt2', ]); it('can give encoding from model name', function (string $modelName) { @@ -21,7 +21,7 @@ 'gpt-4', 'gpt-2', 'text-davinci-003', - 'code-search-ada-code-001' + 'code-search-ada-code-001', ]); it('can give encoding from model prefix', function (string $modelName) { diff --git a/tests/Vocab.php b/tests/Vocab.php index 4e56b08..2212d09 100644 --- a/tests/Vocab.php +++ b/tests/Vocab.php @@ -22,7 +22,7 @@ ['c', 2], [' ', 3], [2, 60], - ['x', null] + ['x', null], ]); it('can give token', function (string $token, int $rank) {