diff --git a/src/Smalot/PdfParser/Encryption/DecryptionError.php b/src/Smalot/PdfParser/Encryption/DecryptionError.php new file mode 100644 index 00000000..31a0f2aa --- /dev/null +++ b/src/Smalot/PdfParser/Encryption/DecryptionError.php @@ -0,0 +1,49 @@ + + * + * @date 2024-01-12 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Smalot\PdfParser\Encryption; + +/** + */ +class DecryptionError extends \RuntimeException +{ +} diff --git a/src/Smalot/PdfParser/Encryption/FileKey.php b/src/Smalot/PdfParser/Encryption/FileKey.php new file mode 100644 index 00000000..0c6a8478 --- /dev/null +++ b/src/Smalot/PdfParser/Encryption/FileKey.php @@ -0,0 +1,455 @@ + + * + * @date 2024-01-12 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Smalot\PdfParser\Encryption; + +/** + * Creates the file's decryption key from the info about the file and + * optionally the owner and/or user password. Doesn't call + * Info::getEncAlgorithm(), but figures out what to do based on the encryption + * version and revision instead. + */ +abstract class FileKey +{ + function __construct(Info $info) + { + $this->info = $info; + } + + + public function getKey() + { + return $this->fileKey; + } + + + /** + * Creates the file's decryption key. Internally, creates an instance of + * the appropriate child class. + * + * @return byte string + */ + public static function generate(Info $info, $ownerPassword = null, $userPassword = null) + { + // Create an instance of the appropriate child class. + switch ($info->getRevision()) { + case 2: + case 3: + $helper = new OldFileKey($info, $ownerPassword, $userPassword); + break; + + case 5: + case 6: + $helper = new NewFileKey($info, $ownerPassword, $userPassword); + break; + + default: + throw new InvalidRevision("Unsupported revision in makeFileKey()"); + } + + return $helper->getKey(); + } +} + + +/** + * Handles file keys for encryption revisions 2 and 3. + */ +class OldFileKey extends FileKey +{ + /** + * @throws InvalidPassword if neither of the supplied passwords are valid + */ + function __construct(Info $info, $ownerPassword = null, $userPassword = null) + { + parent::__construct($info); + + $passwordPaddingBytes = [ 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41, 0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08, 0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80, 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]; + $this->passwordPadding = \implode(\array_map("chr", $passwordPaddingBytes)); + + if (!empty($ownerPassword) && \strlen($ownerPassword) > 32) { + $this->ownerPassword = \substr($ownerPassword, 0, 32); + } else { + $this->ownerPassword = $ownerPassword; + } + if (!empty($userPassword) && \strlen($userPassword) > 32) { + $this->userPassword = \substr($userPassword, 0, 32); + } else { + $this->userPassword = $userPassword; + } + + if (!\is_null($this->ownerPassword)) { + $password = $this->decryptPassword($this->ownerPassword); + $this->fileKey = $this->makeFileKeyOld($password); + if (!$this->testFileKey($this->fileKey)) { + $this->fileKey = null; + } + } else { + $this->fileKey = null; + } + + // If owner password was invalid, try user password + if (\is_null($this->fileKey)) { + $password = \is_null($this->userPassword) ? "" : $this->userPassword; + $this->fileKey = $this->makeFileKeyOld($password); + if (!$this->testFileKey($this->fileKey)) + throw new InvalidPassword(); + } + } + + + /** + * Generate the user password by creating a key by hashing the supplied + * owner password and using it to decrypt the owner key from the file data. + */ + function decryptPassword(string $password) + { + if (\strlen($password) >= 32) { + $data = \substr($password, 0, 32); + } else { + $data = $password.\substr($this->passwordPadding, 0, 32 - \strlen($password)); + } + $hash = \md5($data, true); + + switch ($this->info->getRevision()) { + case 2: + // Try to decrypt the hashed user password and see if matches the padding string + return \openssl_decrypt($this->info->getOwnerKey(), "RC4-40", $hash, OPENSSL_RAW_DATA | OPENSSL_NO_PADDING); + break; + + case 3: + for ($round = 0; $round < 50; ++$round) { + $hash = \md5($hash, true); + } + return $this->magicDecrypt($this->info->getOwnerKey(), $hash); + break; + } + } + + + function makeFileKeyOld($password) + { + $permBytes = \Smalot\PdfParser\Utils::lowestBytesStr($this->info->getPerms(), 4); + $padding = \substr($this->passwordPadding, 0, 32 - \strlen($password)); + $data = $password.$padding.$this->info->getOwnerKey().$permBytes.$this->info->getDocID(); + $len = \strlen($data); + if (!$this->info->getEncryptMetadata()) { + $data .= \Smalot\PdfParser\Utils::lowestBytesStr(-1, 4); + } + + $hash = \md5($data, true); + if ($this->info->getRevision() == 3) { + for ($round = 0; $round < 50; ++$round) { + $hash = \md5($hash, true); + } + } + return \substr($hash, 0, $this->info->getFileKeyLength()); + } + + + /** + * Check that the file key is valid. + * + * @return bool + */ + function testFileKey(string $key) + { + switch ($this->info->getRevision()) { + case 2: + // Try to decrypt the hashed user password and see if matches the padding string + $plaintext = \openssl_decrypt($this->info->getUserKey(), "RC4-40", $key, OPENSSL_RAW_DATA | OPENSSL_NO_PADDING); + return $plaintext === $this->passwordPadding; + break; + + case 3: + // Try to decrypt the hashed user password 20 times using a XOR + // cycling of the key and see if matches the padding string + $data = $this->magicDecrypt($this->info->getUserKey(), $key); + $hash = \md5($this->passwordPadding.$this->info->getDocID(), true); + return \substr($data, 0, 16) == $hash; + break; + + default: + throw new InvalidRevision("Mismatch between caller and testPasswordOld()"); + } + } + + + /** + * Multi-round basic decryption used by revision 3. + * + * @return byte string + */ + function magicDecrypt(string $data, string $key) + { + for ($i = 19; $i >= 0; --$i) { + $roundKey = \implode(\array_map( + function($c) use ($i) { return \chr(\ord($c) ^ $i); }, + \str_split($key) + )); + $data = \openssl_decrypt($data, "RC4-40", $roundKey, OPENSSL_RAW_DATA | OPENSSL_NO_PADDING); + } + return $data; + } +} + + +/** +* Handles file keys for encryption revisions 5 and 6. +*/ +class NewFileKey extends FileKey +{ + /** + * @throws InvalidPassword if neither of the supplied passwords are valid + */ + function __construct(Info $info, $ownerPassword = null, $userPassword = null) + { + parent::__construct($info); + + if (!empty($ownerPassword) && \strlen($ownerPassword) > 127) { + $this->ownerPassword = \substr($ownerPassword, 0, 127); + } else { + $this->ownerPassword = $ownerPassword; + } + if (!empty($userPassword) && \strlen($userPassword) > 127) { + $this->userPassword = \substr($userPassword, 0, 127); + } else { + $this->userPassword = $userPassword; + } + + // Revision 5 and 6 keys are 48 bytes long + // Bytes 0-31: password hash + // Bytes 32-39: password check hash salt + // Bytes 40-47: file key hash salt + // Note that when using the owner password, the whole user key + // is also included in the hash input when checking the + // password and decrypting the file key (which is encrypted + // twice, using different intermediate keys, in ownerEnc and + // userEnc) + if (!\is_null($this->ownerPassword)) { + $mainKey = $this->info->getOwnerKey(); + $additionalKey = $this->info->getUserKey(); + $password = $this->ownerPassword; + $encryptedFileKey = $this->info->getOwnerEnc(); + $passwordHash = $this->hashPassword($password, $mainKey, $additionalKey); + if (!$this->testPasswordHash($passwordHash, $mainKey)) { + $passwordHash = null; + } + } else { + $passwordHash = null; + } + + if (\is_null($passwordHash)) { + $mainKey = $this->info->getUserKey(); + $additionalKey = ""; + $password = \is_null($this->userPassword) ? "" : $this->userPassword; + $encryptedFileKey = $this->info->getUserEnc(); + $passwordHash = $this->hashPassword($password, $mainKey, $additionalKey); + if (!$this->testPasswordHash($passwordHash, $mainKey)) { + throw new InvalidPassword(); + } + } + + $this->fileKey = $this->makeFileKeyNew($password, $mainKey, $additionalKey, $encryptedFileKey); + } + + + /** + * Make a hash that can be used to check whether a given password matches + * the one the file was created with. + * + * @param $password Owner or user password + * @param $mainKey + * @param $additionalKey 0 or 48 byte string (when using owner password) + * @return 32 byte string + */ + function hashPassword(string $password, string $mainKey, string $additionalKey) + { + // Use first half of latter 16 bytes of $mainKey and all of + // $additionalKey (if any) + return $this->intermediateKey($password, \substr($mainKey, 32, 8), $additionalKey); + } + + + /** + * Check the hash against the first 32 bytes of the key. + * + * @return bool + */ + function testPasswordHash(string $passwordHash, string $key) + { + return \substr($key, 0, 32) == $passwordHash; + } + + + /** + * Decrypt the relevant ...Enc field. + * + * @param $password Owner or user password + * @param $mainKey + * @param $additionalKey 0 or 48 byte string (when using owner password) + * @param $encryptedFileKey 32 byte string + * + * @return 32 byte string + * + * @throws DecryptionError if the password can't decrypt the file key; shouldn't be possible + */ + function makeFileKeyNew($password, $mainKey, $additionalKey, $encryptedFileKey) + { + // Use second half of latter 16 bytes of $mainKey and all of + // $additionalKey (if any) + $key = $this->intermediateKey($password, \substr($mainKey, 40, 8), $additionalKey); + $iv = \Smalot\PdfParser\Utils::byteString(16); + $decryptedKey = \openssl_decrypt($encryptedFileKey, "aes-256-cbc", $key, OPENSSL_RAW_DATA | OPENSSL_NO_PADDING, $iv); + if ($decryptedKey === false) { + throw new DecryptionError("Decryption failed"); + } + return $decryptedKey; + } + + + /** + * Generate a hash that can either be used to decrypt the relevant ...Enc + * field and get the file key, or verify the password. + * + * @param $password Owner or user password + * @param $saltKey 8 byte string: part of the corresponding key + * @param $additionalKey 0 or 48 byte string (when using owner password) + * @return 32 byte string + */ + function intermediateKey($password, $saltKey, $additionalKey) + { + $hashInput = $password.$saltKey.$additionalKey; + + switch ($this->info->getRevision()) { + case 5: + return \hash("sha256", $hashInput, true); + break; + + case 6: + /* Each round encrypts a plaintext comprising 64 copies of the + * input data plus the hash of the previous round, using a key + * and IV derived from the hash of the previous round. Then a + * hash of that round's cyphertext is produced using a + * different hash algorithm (selected based on the cyphertext). + * The number of rounds is not fixed; the decision to continue + * is based on the cyphertext (see PDFDecryptionKey::hashContinue()). + * 32 bytes of the hash produced by the final round is returned. + */ + + // initial hash + $hash = \hash("sha256", $hashInput, true); + + $this->round = 0; + do { + $hashInput = $password.$hash.$additionalKey; + $blob = ""; + for ($i = 0; $i < 64; ++$i) { + $blob .= $hashInput; + } + $key = \substr($hash, 0, 16); + $iv = \substr($hash, 16, 16); + + // This is different from the key decryption algorithm + $this->cyphertext = \openssl_encrypt($blob, "aes-128-cbc", $key, OPENSSL_RAW_DATA | OPENSSL_NO_PADDING, $iv); + // Derive some 64 bit numbers from the cyphertext and + // analyse them to determine the hash algorithm to use this + // round + $splinters = self::extractSplinters($this->cyphertext); + $remainder = (((((($splinters[0] % 3) << 32) | $splinters[1]) % 3) << 32) | $splinters[2]) % 3; + switch ($remainder) { + case 0: + $hash = \hash("sha256", $this->cyphertext, true); + break; + + case 1: + $hash = \hash("sha384", $this->cyphertext, true); + break; + + case 2: + $hash = \hash("sha512", $this->cyphertext, true); + break; + } + ++$this->round; + } while ($this->hashContinue()); + + return \substr($hash, 0, 32); + break; + + default: + throw new InvalidRevision("Unsupported revision in intermediateKey()"); + } + } + + + /** + * Determine whether to continue based on the round count and an arbitrary + * value within the current round's cyphertext. A minimum of 64 and a + * maximum of 288 rounds will be done. + * + * @return boolean + */ + function hashContinue() + { + $lastByte = \ord(\substr($this->cyphertext, -1)); + $currentLimit = \max(64, $lastByte + 32); + return $this->round < $currentLimit; + } + + + /** + * Derive 3 integers from the first 16 bytes of $input. + * Bytes are treated as being unsigned, in big-endian order. + * + * @input a byte string of at least 16 characters + * + * @return array of 3 integers + */ + static function extractSplinters($input) + { + $result = []; + $result[] = \hexdec(\bin2hex(\substr($input, 0, 8))); + $result[] = \hexdec(\bin2hex(\Smalot\PdfParser\Utils::byteString(4).\substr($input, 8, 4))); + $result[] = \hexdec(\bin2hex(\Smalot\PdfParser\Utils::byteString(4).\substr($input, 12, 4))); + return $result; + } +} diff --git a/src/Smalot/PdfParser/Encryption/Info.php b/src/Smalot/PdfParser/Encryption/Info.php new file mode 100644 index 00000000..c7e4b5f7 --- /dev/null +++ b/src/Smalot/PdfParser/Encryption/Info.php @@ -0,0 +1,296 @@ + + * + * @date 2024-01-12 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Smalot\PdfParser\Encryption; + +class Info +{ + protected $docId; + protected $metaData; + protected $ok = false; + + protected $fileKeyLength = 0; + protected $encAlgorithm = null; + protected $streamFilter = ""; + protected $stringFilter = ""; + protected $cfLength = 0; + + + function __construct(array $rawMetadata, array $idArr) + { + /** @var + * Associative array indexed by $this->metaData key, mapping to arrays: + * [ , ] + */ + $metadataTranslation = [ + 'version' => [ 'V', true ], + 'revision' => [ 'R', true ], + 'length' => [ 'Length', true ], + 'ownerKey' => [ 'O', false ], + 'userKey' => [ 'U', false ], + 'ownerEnc' => [ 'OE', false ], + 'userEnc' => [ 'UE', false ], + 'perms' => [ 'P', true ] + ]; + + $this->metadata = ['encryptMetadata' => true]; + // $rawMetadata is an array of one value being an array representing a PDF list + $headerArr = $rawMetadata[0]; + if (\count($headerArr) == 3 && $headerArr[0] == '<<') { + $headerDic = $headerArr[1]; + } else { + throw new SyntaxError("Missing encryption header"); + } + foreach ($metadataTranslation as $key => $info) { + if ($info[1]) { + $this->metadata[$key] = (int)\Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($headerDic, $info[0], 'numeric'); + } else { + // First look for a raw string + $val = \Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($headerDic, $info[0], '(', false); + if (false !== $val) { + $this->metadata[$key] = $val; + } else { + // Then look for a hex string + $val = \Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($headerDic, $info[0], '<'); + $this->metadata[$key] = \hex2bin($val); + } + } + } + + // This should be an array + try { + $this->docID = $this->decodeDocID($idArr); + } + catch (\TypeError $e) { + // or a scalar value, which is a spec breach + $this->docID = ""; + } + + if ($this->metadata['version'] != 0 && $this->metadata['revision'] != 0 + && $this->metadata['perms'] != 0 + && is_string($this->metadata['ownerKey']) && is_string($this->metadata['userKey'])) { + if (($this->metadata['revision'] <= 4 && strlen($this->metadata['ownerKey']) == 32 && strlen($this->metadata['userKey']) == 32) + || (($this->metadata['revision'] == 5 || $this->metadata['revision'] == 6) + // the spec says 48 bytes, but Acrobat pads them out longer + && strlen($this->metadata['ownerKey']) >= 48 && strlen($this->metadata['userKey']) >= 48 + && is_string($this->metadata['ownerEnc']) && strlen($this->metadata['ownerEnc']) == 32 && is_string($this->metadata['userEnc']) + && strlen($this->metadata['userEnc']) == 32)) { + $this->encAlgorithm = 'RC4'; + // revision 2 forces a 40-bit key - some buggy PDF generators + // set the Length value incorrectly + if ($this->metadata['revision'] == 2 || $this->metadata['length'] == 0) { + $this->fileKeyLength = 5; + } else { + $this->fileKeyLength = $this->metadata['length'] / 8; + } + $this->metadata['encryptMetadata'] = true; + //~ this currently only handles a subset of crypt filter functionality + //~ (in particular, it ignores the EFF entry in $headerDic, and + //~ doesn't handle the case where StmF, StrF, and EFF are not all the + //~ same) + if (($this->metadata['version'] == 4 || $this->metadata['version'] == 5) && ($this->metadata['revision'] == 4 || $this->metadata['revision'] == 5 || $this->metadata['revision'] == 6)) { + $cryptFiltersDic = \Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($headerDic, 'CF', '<<'); + $this->metadata['streamFilter'] = \Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($headerDic, 'StmF', '/'); + $this->metadata['stringFilter'] = \Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($headerDic, 'StrF', '/'); + if (!empty($cryptFiltersDic) && is_string($this->metadata['streamFilter']) && is_string($this->metadata['stringFilter']) && $this->metadata['streamFilter'] == $this->metadata['stringFilter']) { + if ($this->metadata['streamFilter'] == "Identity") { + // no encryption on streams or strings + $this->metadata['version'] = $this->metadata['revision'] = -1; + } else { + // Find required crypt filter and its crypt filter method + // and update metadata accordingly + $cryptFilterInfoDic = \Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($cryptFiltersDic, $this->metadata['streamFilter'], '<<'); + $method = \Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($cryptFilterInfoDic, 'CFM', '/'); + switch ($method) { + case 'V2': + $this->metadata['version'] = 2; + $this->metadata['revision'] = 3; + $this->metadata['cfLength'] = (int)\Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($cryptFilterInfoDic, 'Length', 'numeric', -1); + if ($this->metadata['cfLength'] != 0) { + //~ according to the spec, this should be cfLength / 8 + $this->fileKeyLength = $this->metadata['cfLength']; + } + break; + + case 'AESV2': + $this->metadata['version'] = 2; + $this->metadata['revision'] = 3; + $this->encAlgorithm = 'AES'; + $this->metadata['cfLength'] = (int)\Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($cryptFilterInfoDic, 'Length', 'numeric', -1); + if ($this->metadata['cfLength'] != 0) { + //~ according to the spec, this should be cfLength / 8 + $this->fileKeyLength = $this->metadata['cfLength']; + } + break; + + case 'AESV3': + $this->metadata['version'] = 5; + // let $this->metadata['revision'] be 5 or 6 + $this->encAlgorithm = 'AES256'; + $this->metadata['cfLength'] = (int)\Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($cryptFilterInfoDic, 'Length', 'numeric', -1); + if ($this->metadata['cfLength'] != 0) { + //~ according to the spec, this should be cfLengthArr / 8 + $this->fileKeyLength = $this->metadata['cfLength']; + } + break; + + default: + throw new SyntaxError("Unknown CFM '$method'"); + } + } + } + $this->metadata['encryptMetadata'] = (\Smalot\PdfParser\RawData\RawDataParser::getHeaderValue($headerDic, 'EncryptMetadata', 'boolean') === "true"); + } + if ($this->metadata['version'] >= 1 && $this->metadata['version'] <= 2 && $this->metadata['revision'] >= 2 && $this->metadata['revision'] <= 3) { + if ($this->fileKeyLength > 16 || $this->fileKeyLength < 0) { + $this->fileKeyLength = 16; + } + $this->ok = true; + } elseif ($this->metadata['version'] == 5 && ($this->metadata['revision'] == 5 || $this->metadata['revision'] == 6)) { + if (is_string($this->metadata['ownerEnc']) && is_string($this->metadata['userEnc'])) { + if ($this->fileKeyLength > 32 || $this->fileKeyLength < 0) { + $this->fileKeyLength = 32; + } + $this->ok = true; + } else { + throw new SyntaxError("Weird encryption owner/user info"); + } + } elseif (!($this->version == -1 && $this->revision == -1)) { + throw new Unimplemented("Unsupported version/revision (%d/%d) of Standard security handler", $this->version, $this->revision); + } + } else { + throw new SyntaxError("Invalid encryption key length"); + } + } else { + throw new SyntaxError("Weird encryption info"); + } + } + + + /** + * Get an element from the array of IDs and convert it from hex. + * + * @return a binary string + */ + protected function decodeDocID(array $idArr) + { + // If multiple elements, assume that the first one is correct + $result = \hex2bin($idArr[0]); + if ($result === false) + { + throw new SyntaxError("Can't decode DocID"); + } + return $result; + } + + + public function getVersion() + { + return $this->metadata['version']; + } + + + public function getRevision() + { + return $this->metadata['revision']; + } + + + public function getLength() + { + return $this->metadata['length']; + } + + + public function getOwnerKey() + { + return $this->metadata['ownerKey']; + } + + + public function getUserKey() + { + return $this->metadata['userKey']; + } + + + public function getOwnerEnc() + { + return $this->metadata['ownerEnc']; + } + + + public function getUserEnc() + { + return $this->metadata['userEnc']; + } + + + public function getPerms() + { + return $this->metadata['perms']; + } + + + public function getEncryptMetadata() + { + return $this->metadata['encryptMetadata']; + } + + + public function getDocID() + { + return $this->docID; + } + + + public function getEncAlgorithm() + { + return $this->encAlgorithm; + } + + + public function getFileKeyLength() + { + return $this->fileKeyLength; + } +} diff --git a/src/Smalot/PdfParser/Encryption/InvalidPassword.php b/src/Smalot/PdfParser/Encryption/InvalidPassword.php new file mode 100644 index 00000000..7f9c2d34 --- /dev/null +++ b/src/Smalot/PdfParser/Encryption/InvalidPassword.php @@ -0,0 +1,49 @@ + + * + * @date 2024-01-12 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Smalot\PdfParser\Encryption; + +/** + */ +class InvalidPassword extends \RuntimeException +{ +} diff --git a/src/Smalot/PdfParser/Encryption/InvalidRevision.php b/src/Smalot/PdfParser/Encryption/InvalidRevision.php new file mode 100644 index 00000000..0be36318 --- /dev/null +++ b/src/Smalot/PdfParser/Encryption/InvalidRevision.php @@ -0,0 +1,49 @@ + + * + * @date 2024-01-12 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Smalot\PdfParser\Encryption; + +/** + */ +class InvalidRevision extends \RuntimeException +{ +} diff --git a/src/Smalot/PdfParser/Encryption/Stream.php b/src/Smalot/PdfParser/Encryption/Stream.php new file mode 100644 index 00000000..17cb0658 --- /dev/null +++ b/src/Smalot/PdfParser/Encryption/Stream.php @@ -0,0 +1,187 @@ + + * + * @date 2024-01-12 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Smalot\PdfParser\Encryption; + +/** + * Handles data decoding, decryption & ciphers, extra parsing, etc. + */ +abstract class Stream +{ + function __construct($key) + { + $this->key = $key; + } + + + /** + * Carve the IV and the cyphertext apart. Only used by some algorithms. + * + * @return array of two strings + */ + public static function splitBlock(string $block, int $ivLength) + { + $iv = \substr($block, 0, $ivLength); + $cyphertext = \substr($block, $ivLength); + return [ $iv, $cyphertext ]; + } + + + /** + * Object factory that instantiates the relevant subclass. + * + * @param string $algorithm + * @param $key file key consisting of byte string of the relevant number of characters + * + * @return Stream subclass + * + * @throws InvalidAlgorithm if $algorithm is invalid + */ + public static function make(string $algorithm, string $key) + { + switch ($algorithm) { + case 'RC4': + return new RC4Stream($key); + break; + + case 'AES': + return new AES128Stream($key); + break; + + case 'AES256': + return new AES256Stream($key); + break; + + default: + throw new InvalidAlgorithm("Unsupported encryption algorithm"); + } + } +} + + +class RC4Stream extends Stream +{ + public function decrypt(string $cyphertext, int $num, int $gen) + { + // 32 bytes minus 5 bytes of salting + if (strlen($this->key) <= 27) { + $key = $this->makeObjectKey($num, $gen); + } else { + $key = $this->key; + } + //# printf("%d_%d\n", $num, $gen); + return \openssl_decrypt($cyphertext, "RC4-40", $key, OPENSSL_RAW_DATA | OPENSSL_NO_PADDING); + } + + + /** + * Make a tweaked key that is based on info about the object. + */ + public function makeObjectKey(int $num, int $gen) + { + $objectSalt = [ + ($num >> 0) & 0xff, + ($num >> 8) & 0xff, + ($num >> 16) & 0xff, + ($gen >> 0) & 0xff, + ($gen >> 8) & 0xff + ]; + $blob = $this->key.\implode(\array_map("chr", $objectSalt)); + $hash = \md5($blob, true); + return $hash; + } +} + + +class AES128Stream extends Stream +{ + public function decrypt(string $block, int $num, int $gen) + { + // 32 bytes minus 9 bytes of salting + if (strlen($key) <= 23) { + $key = $this->makeObjectKey($num, $gen); + } else { + $key = $this->key; + } + + list($iv, $cyphertext) = self::splitBlock($block, 16); + return \openssl_decrypt($cyphertext, "aes-128-cbc", $key, OPENSSL_RAW_DATA | OPENSSL_NO_PADDING, $iv); + } + + + /** + * Make a tweaked key that is based on info about the object. + */ + public function makeObjectKey(int $num, int $gen) + { + $objectSalt = [ + ($num >> 0) & 0xff, + ($num >> 8) & 0xff, + ($num >> 16) & 0xff, + ($gen >> 0) & 0xff, + ($gen >> 8) & 0xff + ]; + $blob = $this->key.\implode(\array_map("chr", $objectSalt))."sAlT"; + $hash = \md5($blob, true); + return $hash; + } +} + + +class AES256Stream extends Stream +{ + /** + * + */ + public function decrypt(string $block, int $num, int $gen) + { + $key = $this->makeObjectKey($num, $gen); + list($iv, $cyphertext) = self::splitBlock($block, 16); + return \openssl_decrypt($cyphertext, "aes-256-cbc", $key, OPENSSL_RAW_DATA | OPENSSL_NO_PADDING, $iv); + } + + + public function makeObjectKey(int $num, int $gen) + { + return $this->key; + } +} diff --git a/src/Smalot/PdfParser/Encryption/SyntaxError.php b/src/Smalot/PdfParser/Encryption/SyntaxError.php new file mode 100644 index 00000000..a7601e56 --- /dev/null +++ b/src/Smalot/PdfParser/Encryption/SyntaxError.php @@ -0,0 +1,49 @@ + + * + * @date 2024-01-12 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Smalot\PdfParser\Encryption; + +/** + */ +class SyntaxError extends \RuntimeException +{ +} diff --git a/src/Smalot/PdfParser/Encryption/Unimplemented.php b/src/Smalot/PdfParser/Encryption/Unimplemented.php new file mode 100644 index 00000000..f8b238f9 --- /dev/null +++ b/src/Smalot/PdfParser/Encryption/Unimplemented.php @@ -0,0 +1,49 @@ + + * + * @date 2024-01-12 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Smalot\PdfParser\Encryption; + +/** + */ +class Unimplemented extends \RuntimeException +{ +} diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index af1dbc83..7371fcf3 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -63,6 +63,14 @@ class RawDataParser protected $filterHelper; protected $objects; + /** + * @var \Smalot\PdfParser\Encryption\Info or null + */ + protected $encryptionInfo = null; + /** + * @var \Smalot\PdfParser\Encryption\Stream or null + */ + protected $decryptionHelper = null; /** * @param array $cfg Configuration array, default is [] @@ -122,6 +130,15 @@ protected function decodeStream(string $pdfData, array $xref, array $sdic, strin } } + if (!is_null($this->decryptionHelper)) { + if (!is_null($objRefArr)) { + list($num, $gen) = $objRefArr; + $stream = $this->decryptionHelper->decrypt($stream, $num, $gen); + } else { + throw new \Exception('Logic error: $objRefArr not passed to decodeStream()'); + } + } + // decode the stream $remaining_filters = []; foreach ($filters as $filter) { @@ -966,6 +983,50 @@ protected function getXrefData(string $pdfData, int $offset = 0, array $xref = [ return $xref; } + /** + * + * @param string $pdfData PDF data + * @param array $xref xref array + * @param string $encryptRef Object number and generation number separated by underscore character + * + * @return \Smalot\PdfParser\Encryption\Info + * + * @throws \Exception if invalid object reference found + */ + protected function setupDecryption(string $pdfData, array $xref, string $encryptRef) + { + $this->encryptionInfo = $this->parseEncryptionInfo($pdfData, $xref, $encryptRef); + //# $ownerPassword, $userPassword + $fileKey = \Smalot\PdfParser\Encryption\FileKey::generate($this->encryptionInfo); + + $this->decryptionHelper = \Smalot\PdfParser\Encryption\Stream::make( + $this->encryptionInfo->getEncAlgorithm(), $fileKey); + } + + /** + * Get content of encryption metadata. + * + * @param string $pdfData PDF data + * @param array $xref xref array + * @param string $objRef Object number and generation number separated by underscore character + * + * @return \Smalot\PdfParser\Encryption\Info + * + * @throws \Exception if invalid object reference found + */ + protected function parseEncryptionInfo(string $pdfData, array $xref, string $objRef): \Smalot\PdfParser\Encryption\Info + { + if (isset($xref['trailer']['id'])) { + $fileIdArr = $xref['trailer']['id']; + } else { + $fileIdArr = []; + } + $offset = $xref['xref'][$objRef]; + $encryptArr = $this->getIndirectObject($pdfData, $xref, $objRef, $offset, true); + + return new \Smalot\PdfParser\Encryption\Info($encryptArr, $fileIdArr); + } + /** * Parses PDF data and returns extracted data as array. * @@ -998,6 +1059,14 @@ public function parseData(string $data): array $xref = $this->getXrefData($pdfData); } + // Pre-parse encryption object + if (isset($xref['trailer']['encrypt'])) { + $encryptRef = $xref['trailer']['encrypt']; + if (isset($xref['xref'][$encryptRef])) { + $this->setupDecryption($pdfData, $xref, $encryptRef); + } + } + // parse all document objects $objects = []; foreach ($xref['xref'] as $obj => $offset) { diff --git a/src/Smalot/PdfParser/Utils.php b/src/Smalot/PdfParser/Utils.php new file mode 100644 index 00000000..bf9ec071 --- /dev/null +++ b/src/Smalot/PdfParser/Utils.php @@ -0,0 +1,81 @@ + + * + * @date 2024-01-12 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Smalot\PdfParser; + +/** + */ +class Utils +{ + /** + * Convert an integer to a string of @p $numBytes bytes, LSB first. + * + * @return byte string representing a little-endian integer + */ + static function lowestBytesStr($n, $numBytes) + { + $result = ""; + for ($i = 0; $i < $numBytes; ++$i) { + $result .= chr($n & 0xFF); + $n = $n >> 8; + } + return $result; + } + + + /** + * Create a byte string of a given length. + * + * @param $numBytes + * @param $byte The byte to use for each character, default NUL + * + * @return byte string representing a little-endian integer + */ + static function byteString(int $numBytes, int $byte = 0) + { + return \implode(\array_map( + function($n) { return chr($n); }, + \array_fill(0, $numBytes, $byte) + )); + } +} +