Skip to content

Commit

Permalink
[RELEASE] iText pdfOCR 4.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
iText-CI committed Oct 11, 2024
2 parents 7657d96 + 0fc204d commit 3167bc6
Show file tree
Hide file tree
Showing 57 changed files with 424 additions and 184 deletions.
40 changes: 40 additions & 0 deletions SECURITY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# iText Security Policy

## Reporting a Vulnerability

We are committed to maintaining the security of our software. If you discover a security vulnerability, we encourage you to report it to us as soon as possible.

To report a vulnerability, please visit our [Vulnerability Reporting Page](https://itextpdf.com/report-vulnerability), or email [vulnerability@apryse.com](vulnerability@apryse.com). If you do not receive a response in 2 business days, please follow up as we may not have received your message.

We follow the procedure of Coordinated Vulnerability Disclosure (CVD) and, to protect the ecosystem, we request that those reporting do the same. Please visit the above page for more information, and follow the steps below to ensure that your report is handled promptly and appropriately:

1. **Do not disclose the vulnerability publicly** until we have had a chance to address it.
2. **Provide a detailed description** of the vulnerability, including steps to reproduce it, if possible.
3. **Include any relevant information** such as the version of pdfOCR you are using, your operating system, and any other pertinent details.

## Security Updates and Patches

When a vulnerability is reported, we will:

1. **Investigate and verify** the vulnerability.
2. **Develop and test** a fix for the vulnerability.
3. **Release a patch** as soon as possible.


## Known Vulnerabilities

The iText Knowledge Base has a page for known [Common Vulnerabilities and Exposures](https://kb.itextpdf.com/itext/cves) (CVEs), please check it to ensure your vulnerability has not already been disclosed or addressed.

## Supported product lines

See [Compatibility Matrix](https://kb.itextpdf.com/itext/compatibility-matrix)

## Security Best Practices

To help ensure the security of your applications using pdfOCR, we recommend the following best practices:

1. **Keep pdfOCR up to date** by regularly checking for and applying updates.
2. **Review and follow** our security guidelines for secure usage.
3. **Monitor your applications** for any unusual activity and investigate any anomalies promptly.

Thank you for helping us keep iText secure!
4 changes: 2 additions & 2 deletions doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
# title of most generated pages and in a few other places.
# The default value is: My Project.

PROJECT_NAME = "pdfOCR 3.0.2 API"
PROJECT_NAME = "pdfOCR 4.0.0 API"

# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
Expand Down Expand Up @@ -485,7 +485,7 @@ EXTRACT_PRIV_VIRTUAL = NO
# scope will be included in the documentation.
# The default value is: NO.

EXTRACT_PACKAGE = NO
EXTRACT_PACKAGE = YES

# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
# included in the documentation.
Expand Down
6 changes: 3 additions & 3 deletions itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@

[assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")]

[assembly: AssemblyVersion("3.0.2.0")]
[assembly: AssemblyFileVersion("3.0.2.0")]
[assembly: AssemblyInformationalVersion("3.0.2")]
[assembly: AssemblyVersion("4.0.0.0")]
[assembly: AssemblyFileVersion("4.0.0.0")]
[assembly: AssemblyInformationalVersion("4.0.0")]
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.csproj" Condition="Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.csproj')" />
<PackageReference Include="itext.pdftest" Version="8.0.3" Condition="!Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.csproj')" />
<PackageReference Include="itext.pdftest" Version="9.0.0" Condition="!Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.csproj')" />
<ProjectReference Include="..\..\..\itextcore\itext\itext.bouncy-castle-adapter\itext.bouncy-castle-adapter.csproj" Condition="Exists('..\..\..\itextcore\itext\itext.bouncy-castle-adapter\itext.bouncy-castle-adapter.csproj')" />
<PackageReference Include="itext.bouncy-castle-adapter" Version="8.0.3" Condition="!Exists('..\..\..\itextcore\itext\itext.bouncy-castle-adapter\itext.bouncy-castle-adapter.csproj')" />
<PackageReference Include="itext.bouncy-castle-adapter" Version="9.0.0" Condition="!Exists('..\..\..\itextcore\itext\itext.bouncy-castle-adapter\itext.bouncy-castle-adapter.csproj')" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.6.0" />
<PackageReference Include="NUnit" Version="3.12.0" />
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1">
Expand Down
30 changes: 15 additions & 15 deletions itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ public virtual void CreatePdfAFileWithFileTest() {
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
.UTF8);
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
PdfAConformanceLevel cl = pdf.GetReader().GetPdfAConformanceLevel();
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetConformance(), cl.GetConformance());
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetPart(), cl.GetPart());
PdfAConformance cl = pdf.GetReader().GetPdfConformance().GetAConformance();
NUnit.Framework.Assert.AreEqual(PdfAConformance.PDF_A_3U.GetLevel(), cl.GetLevel());
NUnit.Framework.Assert.AreEqual(PdfAConformance.PDF_A_3U.GetPart(), cl.GetPart());
}
}

Expand All @@ -120,9 +120,9 @@ public virtual void CreatePdfAFileWithFileNoMetaTest() {
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
.UTF8);
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
PdfAConformanceLevel cl = pdf.GetReader().GetPdfAConformanceLevel();
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetConformance(), cl.GetConformance());
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetPart(), cl.GetPart());
PdfAConformance cl = pdf.GetReader().GetPdfConformance().GetAConformance();
NUnit.Framework.Assert.AreEqual(PdfAConformance.PDF_A_3U.GetLevel(), cl.GetLevel());
NUnit.Framework.Assert.AreEqual(PdfAConformance.PDF_A_3U.GetPart(), cl.GetPart());
}
}

Expand Down Expand Up @@ -167,7 +167,7 @@ public virtual void TestThaiImageWithNotDefGlyphs() {

[NUnit.Framework.Test]
public virtual void TestImageRotationHandler() {
NUnit.Framework.Assert.That(() => {
Exception exception = NUnit.Framework.Assert.Catch(typeof(Exception), () => {
OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
properties.SetImageRotationHandler(new ApiTest.NotImplementedImageRotationHandler());
String testName = "testSetAndGetImageRotationHandler";
Expand All @@ -176,13 +176,13 @@ public virtual void TestImageRotationHandler() {
PdfHelper.CreatePdf(pdfPath, new FileInfo(path), properties);
NUnit.Framework.Assert.IsNotNull(properties.GetImageRotationHandler());
}
, NUnit.Framework.Throws.InstanceOf<Exception>().With.Message.EqualTo("applyRotation is not implemented"))
;
);
NUnit.Framework.Assert.AreEqual("applyRotation is not implemented", exception.Message);
}

[NUnit.Framework.Test]
public virtual void TestImageRotationHandlerForTiff() {
NUnit.Framework.Assert.That(() => {
Exception exception = NUnit.Framework.Assert.Catch(typeof(Exception), () => {
OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
properties.SetImageRotationHandler(new ApiTest.NotImplementedImageRotationHandler());
String testName = "testSetAndGetImageRotationHandler";
Expand All @@ -191,8 +191,8 @@ public virtual void TestImageRotationHandlerForTiff() {
PdfHelper.CreatePdf(pdfPath, new FileInfo(path), properties);
NUnit.Framework.Assert.IsNotNull(properties.GetImageRotationHandler());
}
, NUnit.Framework.Throws.InstanceOf<Exception>().With.Message.EqualTo("applyRotation is not implemented"))
;
);
NUnit.Framework.Assert.AreEqual("applyRotation is not implemented", exception.Message);
}

[NUnit.Framework.Test]
Expand All @@ -215,21 +215,21 @@ public virtual void TestTableStructureTree() {
}

[NUnit.Framework.Test]
[LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, LogLevel = LogLevelConstants.ERROR)]
public virtual void TestTaggingNotSupported() {
String input = PdfHelper.GetImagesTestDirectory() + "numbers_01.jpg";
String pdfPath = PdfHelper.GetTargetDirectory() + "taggingNotSupported.pdf";
Exception e = NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => PdfHelper.CreatePdf(pdfPath, new
FileInfo(input), new OcrPdfCreatorProperties().SetTagged(true)));
NUnit.Framework.Assert.AreEqual(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT
, PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED), e.Message);
NUnit.Framework.Assert.AreEqual(PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED, e.Message);
}

//\cond DO_NOT_DOCUMENT
internal class NotImplementedImageRotationHandler : IImageRotationHandler {
public virtual ImageData ApplyRotation(ImageData imageData) {
throw new Exception("applyRotation is not implemented");
}
}
//\endcond

private class DummyMetaInfo : IMetaInfo {
}
Expand Down
26 changes: 15 additions & 11 deletions itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public virtual void TestPdfA3uWithNullIntent() {

[NUnit.Framework.Test]
public virtual void TestIncompatibleOutputIntentAndFontColorSpaceException() {
NUnit.Framework.Assert.That(() => {
Exception exception = NUnit.Framework.Assert.Catch(typeof(PdfException), () => {
String testName = "testIncompatibleOutputIntentAndFontColorSpaceException";
String path = PdfHelper.GetDefaultImagePath();
String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf";
Expand All @@ -64,8 +64,9 @@ public virtual void TestIncompatibleOutputIntentAndFontColorSpaceException() {
PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent
());
}
, NUnit.Framework.Throws.InstanceOf<PdfException>().With.Message.EqualTo(PdfAConformanceException.DEVICECMYK_MAY_BE_USED_ONLY_IF_THE_FILE_HAS_A_CMYK_PDFA_OUTPUT_INTENT_OR_DEFAULTCMYK_IN_USAGE_CONTEXT))
;
);
NUnit.Framework.Assert.AreEqual(PdfaExceptionMessageConstant.DEVICECMYK_MAY_BE_USED_ONLY_IF_THE_FILE_HAS_A_CMYK_PDFA_OUTPUT_INTENT_OR_DEFAULTCMYK_IN_USAGE_CONTEXT
, exception.Message);
}

[NUnit.Framework.Test]
Expand All @@ -81,7 +82,7 @@ public virtual void TestPdfA3DefaultMetadata() {
PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));
NUnit.Framework.Assert.AreEqual("en-US", pdfDocument.GetCatalog().GetLang().ToString());
NUnit.Framework.Assert.AreEqual(null, pdfDocument.GetDocumentInfo().GetTitle());
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U, pdfDocument.GetReader().GetPdfAConformanceLevel
NUnit.Framework.Assert.AreEqual(PdfAConformance.PDF_A_3U, pdfDocument.GetReader().GetPdfConformance().GetAConformance
());
pdfDocument.Close();
}
Expand All @@ -102,15 +103,15 @@ public virtual void TestPdfCustomMetadata() {
PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));
NUnit.Framework.Assert.AreEqual(locale, pdfDocument.GetCatalog().GetLang().ToString());
NUnit.Framework.Assert.AreEqual(title, pdfDocument.GetDocumentInfo().GetTitle());
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U, pdfDocument.GetReader().GetPdfAConformanceLevel
NUnit.Framework.Assert.AreEqual(PdfAConformance.PDF_A_3U, pdfDocument.GetReader().GetPdfConformance().GetAConformance
());
pdfDocument.Close();
}

[LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestNonCompliantThaiPdfA() {
NUnit.Framework.Assert.That(() => {
Exception exception = NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => {
String testName = "testNonCompliantThaiPdfA";
String path = PdfHelper.GetThaiImagePath();
String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf";
Expand All @@ -120,8 +121,10 @@ public virtual void TestNonCompliantThaiPdfA() {
PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent
());
}
, NUnit.Framework.Throws.InstanceOf<PdfOcrException>().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, MessageFormatUtil.Format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, 3611))))
;
);
NUnit.Framework.Assert.AreEqual(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT
, MessageFormatUtil.Format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER
, 3611)), exception.Message);
}

[NUnit.Framework.Test]
Expand Down Expand Up @@ -153,15 +156,16 @@ public virtual void TestCompliantThaiPdfA() {
[LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestPdfACreateWithoutPdfLangProperty() {
NUnit.Framework.Assert.That(() => {
Exception exception = NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => {
String testName = "testPdfACreateWithoutPdfLangProperty";
String path = PdfHelper.GetThaiImagePath();
String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf";
PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), new OcrPdfCreatorProperties(), PdfHelper.GetRGBPdfOutputIntent
());
}
, NUnit.Framework.Throws.InstanceOf<PdfOcrException>().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET)))
;
);
NUnit.Framework.Assert.AreEqual(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT
, PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET), exception.Message);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
using System;
using System.Collections.Generic;
using System.IO;
using iText.Commons.Utils;
Expand Down Expand Up @@ -70,21 +71,17 @@ public virtual void GetImageDataFromValidNotTiffTest() {
[NUnit.Framework.Test]
[LogMessage(PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
public virtual void GetImageDataFromNotExistingImageTest() {
NUnit.Framework.Assert.That(() => {
PdfCreatorUtil.GetImageData(new FileInfo("no such path"), null);
}
, NUnit.Framework.Throws.InstanceOf<PdfOcrInputException>())
;
NUnit.Framework.Assert.Catch(typeof(PdfOcrInputException), () => PdfCreatorUtil.GetImageData(new FileInfo(
"no such path"), null));
}

[NUnit.Framework.Test]
[LogMessage(PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
public virtual void GetImageDataFromInvalidImageTest() {
NUnit.Framework.Assert.That(() => {
PdfCreatorUtil.GetImageData(new FileInfo(PdfHelper.GetImagesTestDirectory() + "corrupted.jpg"), null);
}
, NUnit.Framework.Throws.InstanceOf<PdfOcrInputException>().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE)))
;
Exception exception = NUnit.Framework.Assert.Catch(typeof(PdfOcrInputException), () => PdfCreatorUtil.GetImageData
(new FileInfo(PdfHelper.GetImagesTestDirectory() + "corrupted.jpg"), null));
NUnit.Framework.Assert.AreEqual(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE
), exception.Message);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public virtual void TestFontColor() {
[LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
[NUnit.Framework.Test]
public virtual void TestInvalidFontWithInvalidDefaultFontFamily() {
NUnit.Framework.Assert.That(() => {
Exception exception = NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => {
String testName = "testInvalidFontWithInvalidDefaultFontFamily";
String path = PdfHelper.GetDefaultImagePath();
String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf";
Expand All @@ -72,8 +72,9 @@ public virtual void TestInvalidFontWithInvalidDefaultFontFamily() {
NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, result);
NUnit.Framework.Assert.AreEqual(ScaleMode.SCALE_TO_FIT, properties.GetScaleMode());
}
, NUnit.Framework.Throws.InstanceOf<PdfOcrException>().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrExceptionMessageConstant.CANNOT_RESOLVE_PROVIDED_FONTS)))
;
);
NUnit.Framework.Assert.AreEqual(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT
, PdfOcrExceptionMessageConstant.CANNOT_RESOLVE_PROVIDED_FONTS), exception.Message);
}

[NUnit.Framework.Test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ public virtual void CreateTxtFile(IList<FileInfo> inputImages, FileInfo txtFile,
) {
}

public virtual bool IsTaggingSupported() {
return false;
}

public virtual OcrEngineProperties GetOcrEngineProperties() {
return ocrEngineProperties;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ public virtual void CreateTxtFile(IList<FileInfo> inputImages, FileInfo txtFile,
) {
}

public virtual bool IsTaggingSupported() {
return true;
}

public virtual OcrEngineProperties GetOcrEngineProperties() {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@ public static String GetTargetDirectory() {

/// <summary>Create pdfWriter using provided path to destination file.</summary>
public static PdfWriter GetPdfWriter(String pdfPath) {
return new PdfWriter(pdfPath, new WriterProperties().AddUAXmpMetadata());
return new PdfWriter(pdfPath, new WriterProperties().AddPdfUaXmpMetadata(PdfUAConformance.PDF_UA_1));
}

/// <summary>Create pdfWriter.</summary>
public static PdfWriter GetPdfWriter() {
return new PdfWriter(new MemoryStream(), new WriterProperties().AddUAXmpMetadata());
return new PdfWriter(new MemoryStream(), new WriterProperties().AddPdfUaXmpMetadata(PdfUAConformance.PDF_UA_1
));
}

/// <summary>Creates PDF rgb output intent for tests.</summary>
Expand Down
Loading

0 comments on commit 3167bc6

Please sign in to comment.