diff --git a/doxyfile b/doxyfile
index 51d9aa5..06dcdbe 100644
--- a/doxyfile
+++ b/doxyfile
@@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
# title of most generated pages and in a few other places.
# The default value is: My Project.
-PROJECT_NAME = "pdfOCR 3.0.1 API"
+PROJECT_NAME = "pdfOCR 3.0.2 API"
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
diff --git a/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs b/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs
index 591395c..9628aa2 100644
--- a/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs
+++ b/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs
@@ -7,7 +7,7 @@
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("Apryse Group NV")]
[assembly: AssemblyProduct("iText")]
-[assembly: AssemblyCopyright ("Copyright (c) 1998-2023 Apryse Group NV")]
+[assembly: AssemblyCopyright("Copyright (c) 1998-2024 Apryse Group NV")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
@@ -15,6 +15,6 @@
[assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")]
-[assembly: AssemblyVersion("3.0.1.0")]
-[assembly: AssemblyFileVersion("3.0.1.0")]
-[assembly: AssemblyInformationalVersion("3.0.1")]
+[assembly: AssemblyVersion("3.0.2.0")]
+[assembly: AssemblyFileVersion("3.0.2.0")]
+[assembly: AssemblyInformationalVersion("3.0.2")]
diff --git a/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj b/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj
index 0b5e41e..d4f8850 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj
+++ b/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj
@@ -25,9 +25,9 @@
-
+
-
+
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs
index e0bb646..f66ea7d 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -30,7 +30,9 @@ You should have received a copy of the GNU Affero General Public License
using iText.Kernel.Font;
using iText.Kernel.Geom;
using iText.Kernel.Pdf;
+using iText.Kernel.Utils;
using iText.Pdfa;
+using iText.Pdfocr.Exceptions;
using iText.Pdfocr.Helpers;
using iText.Pdfocr.Logs;
using iText.Test;
@@ -193,6 +195,36 @@ public virtual void TestImageRotationHandlerForTiff() {
;
}
+ [NUnit.Framework.Test]
+ public virtual void TestTableStructureTree() {
+ String pdfPath = PdfHelper.GetTargetDirectory() + "tableStructureTree.pdf";
+ // Image doesn't really matter here
+ String input = PdfHelper.GetImagesTestDirectory() + "numbers_01.jpg";
+ IOcrEngine ocrEngine = new TestStructureDetectionOcrEngine();
+ OcrPdfCreatorProperties creatorProperties = new OcrPdfCreatorProperties();
+ creatorProperties.SetTextColor(DeviceRgb.RED);
+ creatorProperties.SetTagged(true);
+ OcrPdfCreator pdfCreator = new OcrPdfCreator(ocrEngine, creatorProperties);
+ TestProcessProperties processProperties = new TestProcessProperties(5, 6, 50, 15, 100, 200);
+ using (PdfWriter pdfWriter = PdfHelper.GetPdfWriter(pdfPath)) {
+ pdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList(new FileInfo(input)), pdfWriter, new DocumentProperties
+ (), processProperties).Close();
+ }
+ NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(pdfPath, PdfHelper.TEST_DIRECTORY + "cmp_tableStructureTree.pdf"
+ , PdfHelper.GetTargetDirectory(), "diff_"));
+ }
+
+ [NUnit.Framework.Test]
+ [LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, LogLevel = LogLevelConstants.ERROR)]
+ public virtual void TestTaggingNotSupported() {
+ String input = PdfHelper.GetImagesTestDirectory() + "numbers_01.jpg";
+ String pdfPath = PdfHelper.GetTargetDirectory() + "taggingNotSupported.pdf";
+ Exception e = NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => PdfHelper.CreatePdf(pdfPath, new
+ FileInfo(input), new OcrPdfCreatorProperties().SetTagged(true)));
+ NUnit.Framework.Assert.AreEqual(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT
+ , PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED), e.Message);
+ }
+
internal class NotImplementedImageRotationHandler : IImageRotationHandler {
public virtual ImageData ApplyRotation(ImageData imageData) {
throw new Exception("applyRotation is not implemented");
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs
index d5f78d1..e748594 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs
index da4aa6b..a9acf93 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs
index 25fdf10..a736190 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs
index eae0faf..564aae6 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs
index 4c96e66..8fe29c6 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs
index 4d98300..a97282d 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs
index 1574f9e..c4553a3 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs
index 63b6ad4..5ba8dbd 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs
index 1d13ec0..8cb0274 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs
index c29dbe3..a200d29 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs
index 06ca089..957efe6 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs
index d1ff9f0..0dcca43 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/ExtractionStrategy.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/ExtractionStrategy.cs
index 2269e5e..be4fa7f 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/ExtractionStrategy.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/ExtractionStrategy.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs
index e95a9e6..ebba8f9 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestProcessProperties.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestProcessProperties.cs
new file mode 100644
index 0000000..9868221
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestProcessProperties.cs
@@ -0,0 +1,73 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Pdfocr;
+
+namespace iText.Pdfocr.Helpers {
+ public class TestProcessProperties : IOcrProcessProperties {
+ private float cellWidth;
+
+ private float cellHeight;
+
+ private float startX;
+
+ private float startY;
+
+ private int rowCount;
+
+ private int columnCount;
+
+ public TestProcessProperties(int rowCount, int columnCount, float cellWidth, float cellHeight, float startX
+ , float startY) {
+ this.rowCount = rowCount;
+ this.columnCount = columnCount;
+ this.cellWidth = cellWidth;
+ this.cellHeight = cellHeight;
+ this.startX = startX;
+ this.startY = startY;
+ }
+
+ public virtual int GetRowCount() {
+ return rowCount;
+ }
+
+ public virtual int GetColumnCount() {
+ return columnCount;
+ }
+
+ public virtual float GetCellWidth() {
+ return cellWidth;
+ }
+
+ public virtual float GetCellHeight() {
+ return cellHeight;
+ }
+
+ public virtual float GetStartX() {
+ return startX;
+ }
+
+ public virtual float GetStartY() {
+ return startY;
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestStructureDetectionOcrEngine.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestStructureDetectionOcrEngine.cs
new file mode 100644
index 0000000..7970f00
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestStructureDetectionOcrEngine.cs
@@ -0,0 +1,89 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System.Collections.Generic;
+using System.IO;
+using iText.Kernel.Geom;
+using iText.Pdfocr;
+using iText.Pdfocr.Structuretree;
+
+namespace iText.Pdfocr.Helpers {
+ public class TestStructureDetectionOcrEngine : IOcrEngine {
+ public TestStructureDetectionOcrEngine() {
+ }
+
+ public virtual IDictionary> DoImageOcr(FileInfo input) {
+ return null;
+ }
+
+ public virtual IDictionary> DoImageOcr(FileInfo input, OcrProcessContext ocrProcessContext
+ ) {
+ TestProcessProperties processProperties = (TestProcessProperties)ocrProcessContext.GetOcrProcessProperties
+ ();
+ IList textItems = new List();
+ TableTreeItem table = new TableTreeItem();
+ float cellWidth = processProperties.GetCellWidth();
+ float cellHeight = processProperties.GetCellHeight();
+ float startX = processProperties.GetStartX();
+ float startY = processProperties.GetStartY();
+ float x = startX;
+ float y = startY;
+ for (int i = 0; i < processProperties.GetRowCount(); ++i) {
+ TableRowTreeItem row = null;
+ if (i > 0) {
+ row = new TableRowTreeItem();
+ table.AddRow(row);
+ }
+ for (int j = 0; j < processProperties.GetColumnCount(); ++j) {
+ TextInfo textInfo = new TextInfo(i + " " + j, new Rectangle(x, y, cellWidth, cellHeight));
+ // Mark the 1st row item as artifacts
+ if (i == 0) {
+ textInfo.SetLogicalStructureTreeItem(ArtifactItem.GetInstance());
+ }
+ else {
+ TableCellTreeItem cell = new TableCellTreeItem();
+ row.AddCell(cell);
+ ParagraphTreeItem paragraph = new ParagraphTreeItem();
+ cell.AddChild(paragraph);
+ SpanTreeItem span = new SpanTreeItem();
+ paragraph.AddChild(span);
+ textInfo.SetLogicalStructureTreeItem(span);
+ }
+ textItems.Add(textInfo);
+ x += cellWidth;
+ }
+ x = startX;
+ y -= cellHeight;
+ }
+ IDictionary> result = new Dictionary>();
+ result.Put(1, textItems);
+ return result;
+ }
+
+ public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile) {
+ }
+
+ public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile, OcrProcessContext ocrProcessContext
+ ) {
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs
index d3e56fd..91b642e 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs
index 8b941e6..db07fa1 100644
--- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/LogicalStructureTreeItemTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/LogicalStructureTreeItemTest.cs
new file mode 100644
index 0000000..f2942e4
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/LogicalStructureTreeItemTest.cs
@@ -0,0 +1,63 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Kernel.Pdf.Tagutils;
+using iText.Test;
+
+namespace iText.Pdfocr.Structuretree {
+ [NUnit.Framework.Category("UnitTest")]
+ public class LogicalStructureTreeItemTest : ExtendedITextTest {
+ [NUnit.Framework.Test]
+ public virtual void AddChildTest() {
+ LogicalStructureTreeItem parent = new LogicalStructureTreeItem();
+ LogicalStructureTreeItem child1 = new LogicalStructureTreeItem();
+ LogicalStructureTreeItem child2 = new LogicalStructureTreeItem();
+ child1.AddChild(child2);
+ parent.AddChild(child1);
+ parent.AddChild(child2);
+ NUnit.Framework.Assert.AreEqual(2, parent.GetChildren().Count);
+ NUnit.Framework.Assert.AreEqual(0, child1.GetChildren().Count);
+ NUnit.Framework.Assert.AreEqual(parent, child1.GetParent());
+ NUnit.Framework.Assert.AreEqual(parent, child2.GetParent());
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void RemoveChildTest() {
+ LogicalStructureTreeItem parent = new LogicalStructureTreeItem();
+ LogicalStructureTreeItem child1 = new LogicalStructureTreeItem();
+ LogicalStructureTreeItem child2 = new LogicalStructureTreeItem();
+ child1.AddChild(child2);
+ parent.AddChild(child1);
+ parent.AddChild(child2);
+ NUnit.Framework.Assert.IsTrue(parent.RemoveChild(child1));
+ NUnit.Framework.Assert.IsFalse(parent.RemoveChild(child1));
+ NUnit.Framework.Assert.AreEqual(1, parent.GetChildren().Count);
+ }
+
+ [NUnit.Framework.Test]
+ public virtual void AccessibilityPropertiesTest() {
+ LogicalStructureTreeItem item = new LogicalStructureTreeItem().SetAccessibilityProperties(new DefaultAccessibilityProperties
+ ("Some role"));
+ NUnit.Framework.Assert.AreEqual("Some role", item.GetAccessibilityProperties().GetRole());
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/TableTreeStructureTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/TableTreeStructureTest.cs
new file mode 100644
index 0000000..3e61144
--- /dev/null
+++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/TableTreeStructureTest.cs
@@ -0,0 +1,43 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Test;
+
+namespace iText.Pdfocr.Structuretree {
+ [NUnit.Framework.Category("UnitTest")]
+ public class TableTreeStructureTest : ExtendedITextTest {
+ [NUnit.Framework.Test]
+ public virtual void TableTreeTest() {
+ TableTreeItem table = new TableTreeItem().AddRow(new TableRowTreeItem().AddCell((TableCellTreeItem)new TableCellTreeItem
+ ().AddChild(new ParagraphTreeItem().AddChild(new SpanTreeItem()))).AddCell((TableCellTreeItem)new TableCellTreeItem
+ ().AddChild(new ParagraphTreeItem().AddChild(new SpanTreeItem())))).AddRow(new TableRowTreeItem().AddCell
+ ((TableCellTreeItem)new TableCellTreeItem().AddChild(new ParagraphTreeItem().AddChild(new SpanTreeItem
+ ()))).AddCell((TableCellTreeItem)new TableCellTreeItem().AddChild(new ParagraphTreeItem().AddChild(new
+ SpanTreeItem()))));
+ NUnit.Framework.Assert.AreEqual(2, table.GetChildren().Count);
+ NUnit.Framework.Assert.AreEqual(2, table.GetChildren()[0].GetChildren().Count);
+ NUnit.Framework.Assert.AreEqual(1, table.GetChildren()[0].GetChildren()[0].GetChildren().Count);
+ NUnit.Framework.Assert.AreEqual(1, table.GetChildren()[0].GetChildren()[0].GetChildren()[0].GetChildren().
+ Count);
+ }
+ }
+}
diff --git a/itext.tests/itext.pdfocr.api.tests/resources/itext/pdfocr/cmp_tableStructureTree.pdf b/itext.tests/itext.pdfocr.api.tests/resources/itext/pdfocr/cmp_tableStructureTree.pdf
new file mode 100644
index 0000000..6cc8faa
Binary files /dev/null and b/itext.tests/itext.pdfocr.api.tests/resources/itext/pdfocr/cmp_tableStructureTree.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs b/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs
index 9b7d29f..3e21341 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs
@@ -7,7 +7,7 @@
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("Apryse Group NV")]
[assembly: AssemblyProduct("iText")]
-[assembly: AssemblyCopyright ("Copyright (c) 1998-2023 Apryse Group NV")]
+[assembly: AssemblyCopyright("Copyright (c) 1998-2024 Apryse Group NV")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
@@ -15,6 +15,6 @@
[assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")]
-[assembly: AssemblyVersion("3.0.1.0")]
-[assembly: AssemblyFileVersion("3.0.1.0")]
-[assembly: AssemblyInformationalVersion("3.0.1")]
+[assembly: AssemblyVersion("3.0.2.0")]
+[assembly: AssemblyFileVersion("3.0.2.0")]
+[assembly: AssemblyInformationalVersion("3.0.2")]
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj b/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj
index d41bb1b..831a886 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj
@@ -26,9 +26,9 @@
-
+
-
+
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs
index 8aa5a2b..c7a4493 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs
index a2a67eb..59d313b 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs
index be21535..16f0783 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs
index 5a65e76..64e9280 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs
index 57f783f..2599336 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs
index aca97e6..a26020b 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs
index 1124167..5d14732 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs
index 74f1f41..0901e61 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationExecutableTest.cs
index 9b07496..f9dce5a 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationLibTest.cs
index 093f508..fcc6176 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs
index ffddb99..eb96c00 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.cs
index 4687576..c5f7e86 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationLibTest.cs
index 9ccba2f..c970fe1 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs
index aa5d45d..d618cd9 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.cs
index 6dd553d..5f38227 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.cs
index 2f87cb5..95f9105 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs
index 6391171..e204f6f 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.cs
index cca27e0..910e64e 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationLibTest.cs
index 44c86c9..f3f8297 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs
index ff03ff1..96ac6ad 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationExecutableTest.cs
index 0743ce5..1bc1ed3 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs
index 6f928b7..3c58c5a 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -84,10 +84,10 @@ public virtual void CompareInvoiceFrontThaiImage() {
tesseractReader.SetTesseract4OcrEngineProperties(properties);
DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList
("tha", "eng"), JavaUtil.ArraysAsList(NOTO_SANS_THAI_FONT_PATH, NOTO_SANS_FONT_PATH), DeviceRgb.RED);
- bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY
- , "diff_") == null;
- bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY
- , "diff_") == null;
+ bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, GetTargetDirectory(
+ ), "diff_") == null;
+ bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory
+ (), "diff_") == null;
NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest);
}
@@ -108,10 +108,10 @@ public virtual void CompareThaiTextImage() {
tesseractReader.SetTesseract4OcrEngineProperties(properties);
DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList
("tha"), JavaUtil.ArraysAsList(NOTO_SANS_THAI_FONT_PATH), DeviceRgb.RED);
- bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY
- , "diff_") == null;
- bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY
- , "diff_") == null;
+ bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, GetTargetDirectory(
+ ), "diff_") == null;
+ bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory
+ (), "diff_") == null;
NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest);
}
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs
index 67d7d89..b40c7b0 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -290,8 +290,8 @@ public virtual void CompareMultiLangImage() {
tesseractReader.SetTesseract4OcrEngineProperties(properties);
DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList
("eng", "deu", "spa"), DeviceCmyk.BLACK);
- NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, TEST_DOCUMENTS_DIRECTORY
- , "diff_"));
+ NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, GetTargetDirectory
+ (), "diff_"));
}
finally {
NUnit.Framework.Assert.AreEqual(TextPositioning.BY_WORDS, tesseractReader.GetTesseract4OcrEngineProperties
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs
index a9d4410..b1f5638 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationExecutableTest.cs
index 732a7be..d39752f 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationLibTest.cs
index 320f340..224a22f 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs
index 72ff0bc..249bc6f 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -93,10 +93,10 @@ public virtual void CompareRotatedImage() {
// Because of difference of tesseract 5 and tesseract 4 there're some differences in text recognition.
// So the goal of this test is to make text invisible and check if image is rotated.
// Proper text recognition is compared in testHocrRotatedImage test by checking HOCR file.
- bool javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY
- , "diff_") == null;
- bool dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY
+ bool javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, GetTargetDirectory()
, "diff_") == null;
+ bool dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory
+ (), "diff_") == null;
NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest);
filename = "180_degrees_rotated";
expectedPdfPathJava = TEST_DOCUMENTS_DIRECTORY + filename + "_java.pdf";
@@ -104,10 +104,10 @@ public virtual void CompareRotatedImage() {
resultPdfPath = GetTargetDirectory() + filename + "_" + testName + ".pdf";
DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList
("eng"), JavaUtil.ArraysAsList(NOTO_SANS_FONT_PATH), null, true);
- javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY,
+ javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, GetTargetDirectory(), "diff_"
+ ) == null;
+ dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory(),
"diff_") == null;
- dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY
- , "diff_") == null;
NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest);
filename = "270_degrees_rotated";
expectedPdfPathJava = TEST_DOCUMENTS_DIRECTORY + filename + "_java.pdf";
@@ -115,10 +115,10 @@ public virtual void CompareRotatedImage() {
resultPdfPath = GetTargetDirectory() + filename + "_" + testName + ".pdf";
DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList
("eng"), JavaUtil.ArraysAsList(NOTO_SANS_FONT_PATH), null, true);
- javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY,
+ javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, GetTargetDirectory(), "diff_"
+ ) == null;
+ dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory(),
"diff_") == null;
- dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY
- , "diff_") == null;
NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest);
}
}
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs
index 9b7a83f..03efcff 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs
index 1474511..a0022c9 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs
index 933486c..48684f7 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs
index 2be9fed..3e5296d 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs
index f8c848b..a2c33a3 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperExecutableTest.cs
index 7d1f7b3..3c05e98 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs
index 927ce80..058d2e3 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs
index f9d7f2a..8aa6101 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs
index 7edbb95..56d42a2 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsExecutableTest.cs
index 453cd95..ddc71cb 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsExecutableTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsExecutableTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsLibTest.cs
index 3746c69..92b578e 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsLibTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsLibTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs
index 205741a..b42b87a 100644
--- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs
+++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_executable.pdf
index 5d706d0..0ec47a6 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_executable.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_lib.pdf
index 004e5ef..be02c79 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_lib.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_executable.pdf
index eaed8dd..0ed0207 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_executable.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_lib.pdf
index 29d2871..43fcb43 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_lib.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_02.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_02.pdf
index 9e1fba5..787908d 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_02.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_02.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_dotnet.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_dotnet.pdf
index 011d403..1f06512 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_dotnet.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_dotnet.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_java.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_java.pdf
index 616b588..eb6415a 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_java.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_java.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_executable.pdf
index 4441b9e..98a56df 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_executable.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_lib.pdf
index 7fa0333..ade75fc 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_lib.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_executable.pdf
index 35b6538..78c055d 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_executable.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_lib.pdf
index 1a726fe..f59e8e7 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_lib.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01.pdf
index 882a45d..ef945e6 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_a3u.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_a3u.pdf
index 7657a62..5faffc2 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_a3u.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_a3u.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_executable.pdf
index 0bbd75b..2351b54 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_executable.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_lib.pdf
index cafdbf5..0461509 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_lib.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_executable.pdf
index f74a7df..695f84c 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_executable.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_lib.pdf
index ea13a39..e12d776 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_lib.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_executable.pdf
index c86db65..3191b10 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_executable.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_lib.pdf
index 867a2f5..dad14bf 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_lib.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/spanish_01_a3u.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/spanish_01_a3u.pdf
index 68c24cf..2cc35a4 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/spanish_01_a3u.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/spanish_01_a3u.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_dotnet.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_dotnet.pdf
index 68f5d7e..c9c31de 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_dotnet.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_dotnet.pdf differ
diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_java.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_java.pdf
index b65d60c..1b27fe1 100644
Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_java.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_java.pdf differ
diff --git a/itext/itext.pdfocr.api/PdfOcrExtensions.cs b/itext/itext.pdfocr.api/PdfOcrExtensions.cs
index b4c44f4..dad4f06 100644
--- a/itext/itext.pdfocr.api/PdfOcrExtensions.cs
+++ b/itext/itext.pdfocr.api/PdfOcrExtensions.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs b/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs
index 5c64f22..2cd884e 100644
--- a/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs
+++ b/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs
@@ -7,16 +7,16 @@
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("Apryse Group NV")]
[assembly: AssemblyProduct("iText")]
-[assembly: AssemblyCopyright ("Copyright (c) 1998-2023 Apryse Group NV")]
+[assembly: AssemblyCopyright("Copyright (c) 1998-2024 Apryse Group NV")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
[assembly: ComVisible(false)]
[assembly: Guid("0c4ceb00-9a56-4547-a925-5974a85a6048")]
-[assembly: AssemblyVersion("3.0.1.0")]
-[assembly: AssemblyFileVersion("3.0.1.0")]
-[assembly: AssemblyInformationalVersion("3.0.1")]
+[assembly: AssemblyVersion("3.0.2.0")]
+[assembly: AssemblyFileVersion("3.0.2.0")]
+[assembly: AssemblyInformationalVersion("3.0.2")]
[assembly: InternalsVisibleTo("itext.pdfocr.api.tests, PublicKey=" +
"00240000048000009400000006020000002400005253413100040000010001008b21ed5b3fc1c1" +
"1996390981fe22bbe71a39a9e11d3c2cefddd6ee92920fa871f9666ae0fa941af0280d0653df04" +
diff --git a/itext/itext.pdfocr.api/itext.pdfocr.api.csproj b/itext/itext.pdfocr.api/itext.pdfocr.api.csproj
index 7d3ba8d..6e1943e 100644
--- a/itext/itext.pdfocr.api/itext.pdfocr.api.csproj
+++ b/itext/itext.pdfocr.api/itext.pdfocr.api.csproj
@@ -30,7 +30,7 @@
-
+
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs b/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs
index b8dae29..73667ec 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IImageRotationHandler.cs b/itext/itext.pdfocr.api/itext/pdfocr/IImageRotationHandler.cs
index 94e176b..18bac1b 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/IImageRotationHandler.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/IImageRotationHandler.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs b/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs
index 15b0f2a..dc5375f 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IOcrProcessProperties.cs b/itext/itext.pdfocr.api/itext/pdfocr/IOcrProcessProperties.cs
new file mode 100644
index 0000000..8a68583
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/IOcrProcessProperties.cs
@@ -0,0 +1,30 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+namespace iText.Pdfocr {
+ ///
+ /// OCR properties passed to the OCR engine as part of
+ /// .
+ ///
+ public interface IOcrProcessProperties {
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs b/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs
index 282723c..1becbb2 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs
index 78544e5..939c5cb 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs
index 727e395..2d2dbee 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -36,6 +36,7 @@ You should have received a copy of the GNU Affero General Public License
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas;
using iText.Kernel.Pdf.Layer;
+using iText.Kernel.Pdf.Tagutils;
using iText.Layout;
using iText.Layout.Element;
using iText.Layout.Font;
@@ -44,6 +45,7 @@ You should have received a copy of the GNU Affero General Public License
using iText.Pdfocr.Exceptions;
using iText.Pdfocr.Logs;
using iText.Pdfocr.Statistics;
+using iText.Pdfocr.Structuretree;
namespace iText.Pdfocr {
///
@@ -193,13 +195,17 @@ public void SetOcrPdfCreatorProperties(OcrPdfCreatorProperties ocrPdfCreatorProp
///
/// for PDF/A-3u document
///
+ ///
+ /// extra OCR process properties passed to
+ ///
+ ///
///
/// result PDF/A-3u
///
/// object
///
public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties
- , PdfOutputIntent pdfOutputIntent) {
+ , PdfOutputIntent pdfOutputIntent, IOcrProcessProperties ocrProcessProperties) {
LOGGER.LogInformation(MessageFormatUtil.Format(PdfOcrLogMessageConstant.START_OCR_FOR_IMAGES, inputImages.
Count));
// create event helper
@@ -207,6 +213,7 @@ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter,
OcrPdfCreatorEventHelper ocrEventHelper = new OcrPdfCreatorEventHelper(pdfSequenceId, ocrPdfCreatorProperties
.GetMetaInfo());
OcrProcessContext ocrProcessContext = new OcrProcessContext(ocrEventHelper);
+ ocrProcessContext.SetOcrProcessProperties(ocrProcessProperties);
// map contains:
// keys: image files
// values:
@@ -277,6 +284,113 @@ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter,
return CreatePdfA(inputImages, pdfWriter, new DocumentProperties(), pdfOutputIntent);
}
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ ///
+ /// ,
+ ///
+ /// and
+ /// .
+ ///
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ ///
+ /// ,
+ ///
+ /// and
+ ///
+ /// . PDF/A-3u document will be created if
+ /// provided
+ ///
+ /// is not null.
+ ///
+ /// NOTE that after executing this method you will have a product event from
+ /// the both itextcore and pdfOcr. Therefore, use this method only if you need to work
+ /// with the generated
+ ///
+ /// . If you don't need this, use the
+ ///
+ /// method. In this case, only the pdfOcr event will be dispatched.
+ ///
+ ///
+ ///
+ ///
+ /// of images to be OCRed
+ ///
+ ///
+ /// the
+ ///
+ /// object
+ /// to write final PDF document to
+ ///
+ /// document properties
+ ///
+ ///
+ ///
+ /// for PDF/A-3u document
+ ///
+ ///
+ /// result PDF/A-3u
+ ///
+ /// object
+ ///
+ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties
+ , PdfOutputIntent pdfOutputIntent) {
+ return CreatePdfA(inputImages, pdfWriter, documentProperties, pdfOutputIntent, null);
+ }
+
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ /// .
+ ///
+ ///
+ /// Performs OCR with set parameters using provided
+ ///
+ /// and
+ /// creates PDF using provided
+ /// .
+ ///
+ /// NOTE that after executing this method you will have a product event from
+ /// the both itextcore and pdfOcr. Therefore, use this method only if you need to work
+ /// with the generated
+ ///
+ /// . If you don't need this, use the
+ ///
+ /// method. In this case, only the pdfOcr event will be dispatched.
+ ///
+ ///
+ ///
+ ///
+ /// of images to be OCRed
+ ///
+ ///
+ /// the
+ ///
+ /// object
+ /// to write final PDF document to
+ ///
+ /// document properties
+ /// extra OCR process properties passed to OcrProcessContext
+ ///
+ /// result
+ ///
+ /// object
+ ///
+ public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties
+ , IOcrProcessProperties ocrProcessProperties) {
+ return CreatePdfA(inputImages, pdfWriter, documentProperties, null, ocrProcessProperties);
+ }
+
///
/// Performs OCR with set parameters using provided
///
@@ -318,7 +432,7 @@ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter,
///
public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties
) {
- return CreatePdfA(inputImages, pdfWriter, documentProperties, null);
+ return CreatePdfA(inputImages, pdfWriter, documentProperties, null, null);
}
///
@@ -360,7 +474,7 @@ public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter, D
/// object
///
public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter) {
- return CreatePdfA(inputImages, pdfWriter, new DocumentProperties(), null);
+ return CreatePdfA(inputImages, pdfWriter, new DocumentProperties(), null, null);
}
///
@@ -510,7 +624,25 @@ private void AddToCanvas(PdfDocument pdfDocument, Rectangle imageSize, IList flatLogicalTree = new Dictionary();
+ if (ocrPdfCreatorProperties.IsTagged()) {
+ // Logical tree, a list of top items, children can be retrieved out of them
+ IList logicalTree = new List();
+ // A map of leaf LogicalStructureTreeItem's to TextInfo's attached to these leaves
+ IDictionary> leavesTextInfos = new Dictionary>();
+ bool taggedSupported = GetLogicalTree(pageText, logicalTree, leavesTextInfos);
+ if (!taggedSupported) {
+ throw new PdfOcrException(PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED);
+ }
+ pdfDocument.SetTagged();
+ // Create a map of TextInfo to tag pointers meanwhile creating the required tags.
+ // Tag pointers are later used to put all the required info into canvas (content stream)
+ BuildLogicalTreeAndFlatten(logicalTree, leavesTextInfos, new TagTreePointer(pdfDocument).SetPageForTagging
+ (pdfPage), flatLogicalTree);
+ }
+ AddTextToCanvas(imageSize, pageText, flatLogicalTree, canvas, multiplier, pdfPage);
}
catch (PdfOcrException e) {
LOGGER.LogError(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, e.Message
@@ -614,6 +746,9 @@ private void AddDataToPdfDocument(IDictionarycanvas to place the image
private void AddImageToCanvas(ImageData imageData, Rectangle imageSize, PdfCanvas pdfCanvas) {
if (imageData != null) {
+ if (ocrPdfCreatorProperties.IsTagged()) {
+ pdfCanvas.OpenTag(new CanvasArtifact());
+ }
if (ocrPdfCreatorProperties.GetPageSize() == null) {
pdfCanvas.AddImageFittedIntoRectangle(imageData, imageSize, false);
}
@@ -624,6 +759,84 @@ private void AddImageToCanvas(ImageData imageData, Rectangle imageSize, PdfCanva
.GetHeight());
pdfCanvas.AddImageFittedIntoRectangle(imageData, rect, false);
}
+ if (ocrPdfCreatorProperties.IsTagged()) {
+ pdfCanvas.CloseTag();
+ }
+ }
+ }
+
+ ///
+ ///
+ ///
+ /// if tagging supported by the engine.
+ ///
+ [System.ObsoleteAttribute(@"In next major version we need to add boolean taggingSupported() method into IOcrEngine and throw exception in OcrPdfCreator constructor if taggingSupported() returns false but OcrPdfCreatorProperties.getTagged returns true."
+ )]
+ private static bool GetLogicalTree(IList textInfos, IList logicalStructureTreeItems
+ , IDictionary> leavesTextInfos) {
+ bool taggedSupported = false;
+ if (textInfos == null) {
+ return taggedSupported;
+ }
+ foreach (TextInfo textInfo in textInfos) {
+ LogicalStructureTreeItem structTreeItem = textInfo.GetLogicalStructureTreeItem();
+ LogicalStructureTreeItem topParent;
+ if (structTreeItem is ArtifactItem) {
+ continue;
+ }
+ else {
+ if (structTreeItem != null) {
+ topParent = GetTopParent(structTreeItem);
+ taggedSupported = true;
+ }
+ else {
+ structTreeItem = new LogicalStructureTreeItem();
+ textInfo.SetLogicalStructureTreeItem(structTreeItem);
+ topParent = structTreeItem;
+ }
+ }
+ IList textInfosPerStructItem = leavesTextInfos.Get(structTreeItem);
+ if (textInfosPerStructItem == null) {
+ textInfosPerStructItem = new List();
+ textInfosPerStructItem.Add(textInfo);
+ leavesTextInfos.Put(structTreeItem, textInfosPerStructItem);
+ }
+ else {
+ textInfosPerStructItem.Add(textInfo);
+ }
+ if (!logicalStructureTreeItems.Contains(topParent)) {
+ logicalStructureTreeItems.Add(topParent);
+ }
+ }
+ return taggedSupported;
+ }
+
+ private static LogicalStructureTreeItem GetTopParent(LogicalStructureTreeItem structInfo) {
+ if (structInfo.GetParent() != null) {
+ return GetTopParent(structInfo.GetParent());
+ }
+ else {
+ return structInfo;
+ }
+ }
+
+ private void BuildLogicalTreeAndFlatten(IList logicalStructureTreeItems, IDictionary
+ > leavesTextInfos, TagTreePointer tagPointer, IDictionary flatLogicalTree) {
+ foreach (LogicalStructureTreeItem structTreeItem in logicalStructureTreeItems) {
+ AccessibilityProperties accessibilityProperties = structTreeItem.GetAccessibilityProperties();
+ if (accessibilityProperties == null) {
+ accessibilityProperties = new DefaultAccessibilityProperties(PdfName.Span.GetValue());
+ }
+ tagPointer.AddTag(accessibilityProperties);
+ IList textItems = leavesTextInfos.Get(structTreeItem);
+ if (textItems != null) {
+ foreach (TextInfo item in textItems) {
+ flatLogicalTree.Put(item, new TagTreePointer(tagPointer));
+ }
+ }
+ BuildLogicalTreeAndFlatten(structTreeItem.GetChildren(), leavesTextInfos, tagPointer, flatLogicalTree);
+ tagPointer.MoveToParent();
}
}
@@ -633,45 +846,61 @@ private void AddImageToCanvas(ImageData imageData, Rectangle imageSize, PdfCanva
///
///
/// text that was found on this image (or on this page)
+ /// a map of TextInfo to a tag pointer
/// canvas to place the text
/// coefficient to adjust text placing on canvas
- /// page parameters
- private void AddTextToCanvas(Rectangle imageSize, IList pageText, PdfCanvas pdfCanvas, float multiplier
- , Rectangle pageMediaBox) {
- if (pageText != null && pageText.Count > 0) {
- Point imageCoordinates = PdfCreatorUtil.CalculateImageCoordinates(ocrPdfCreatorProperties.GetPageSize(), imageSize
- );
- foreach (TextInfo item in pageText) {
- String line = item.GetText();
- float bboxWidthPt = GetWidthPt(item, multiplier);
- float bboxHeightPt = GetHeightPt(item, multiplier);
- FontProvider fontProvider = GetOcrPdfCreatorProperties().GetFontProvider();
- String fontFamily = GetOcrPdfCreatorProperties().GetDefaultFontFamily();
- if (LineNotEmpty(line, bboxHeightPt, bboxWidthPt)) {
- Document document = new Document(pdfCanvas.GetDocument());
- document.SetFontProvider(fontProvider);
- // Scale the text width to fit the OCR bbox
- float fontSize = PdfCreatorUtil.CalculateFontSize(document, line, fontFamily, bboxHeightPt, bboxWidthPt);
- float lineWidth = PdfCreatorUtil.GetRealLineWidth(document, line, fontFamily, fontSize);
- float xOffset = GetXOffsetPt(item, multiplier);
- float yOffset = GetYOffsetPt(item, multiplier, imageSize);
- iText.Layout.Canvas canvas = new iText.Layout.Canvas(pdfCanvas, pageMediaBox);
- canvas.SetFontProvider(fontProvider);
- Text text = new Text(line).SetHorizontalScaling(bboxWidthPt / lineWidth);
- Paragraph paragraph = new Paragraph(text).SetMargin(0).SetMultipliedLeading(1.2f);
- paragraph.SetFontFamily(fontFamily).SetFontSize(fontSize);
- paragraph.SetWidth(bboxWidthPt * 1.5f);
- if (ocrPdfCreatorProperties.GetTextColor() != null) {
- paragraph.SetFontColor(ocrPdfCreatorProperties.GetTextColor());
- }
- else {
- paragraph.SetTextRenderingMode(PdfCanvasConstants.TextRenderingMode.INVISIBLE);
- }
- canvas.ShowTextAligned(paragraph, xOffset + (float)imageCoordinates.x, yOffset + (float)imageCoordinates.y
- , TextAlignment.LEFT);
- canvas.Close();
+ /// current page
+ private void AddTextToCanvas(Rectangle imageSize, IList pageText, IDictionary flatLogicalTree, PdfCanvas pdfCanvas, float multiplier, PdfPage page) {
+ if (pageText == null || pageText.Count == 0) {
+ return;
+ }
+ Rectangle pageMediaBox = page.GetMediaBox();
+ Point imageCoordinates = PdfCreatorUtil.CalculateImageCoordinates(ocrPdfCreatorProperties.GetPageSize(), imageSize
+ );
+ foreach (TextInfo item in pageText) {
+ float bboxWidthPt = GetWidthPt(item, multiplier);
+ float bboxHeightPt = GetHeightPt(item, multiplier);
+ FontProvider fontProvider = GetOcrPdfCreatorProperties().GetFontProvider();
+ String fontFamily = GetOcrPdfCreatorProperties().GetDefaultFontFamily();
+ String line = item.GetText();
+ if (!LineNotEmpty(line, bboxHeightPt, bboxWidthPt)) {
+ continue;
+ }
+ Document document = new Document(pdfCanvas.GetDocument());
+ document.SetFontProvider(fontProvider);
+ // Scale the text width to fit the OCR bbox
+ float fontSize = PdfCreatorUtil.CalculateFontSize(document, line, fontFamily, bboxHeightPt, bboxWidthPt);
+ float lineWidth = PdfCreatorUtil.GetRealLineWidth(document, line, fontFamily, fontSize);
+ float xOffset = GetXOffsetPt(item, multiplier);
+ float yOffset = GetYOffsetPt(item, multiplier, imageSize);
+ TagTreePointer tagPointer = flatLogicalTree.Get(item);
+ if (tagPointer != null) {
+ pdfCanvas.OpenTag(tagPointer.GetTagReference());
+ }
+ else {
+ if (ocrPdfCreatorProperties.IsTagged()) {
+ pdfCanvas.OpenTag(new CanvasArtifact());
}
}
+ iText.Layout.Canvas canvas = new iText.Layout.Canvas(pdfCanvas, pageMediaBox);
+ canvas.SetFontProvider(fontProvider);
+ Text text = new Text(line).SetHorizontalScaling(bboxWidthPt / lineWidth);
+ Paragraph paragraph = new Paragraph(text).SetMargin(0);
+ paragraph.SetFontFamily(fontFamily).SetFontSize(fontSize);
+ paragraph.SetWidth(bboxWidthPt * 1.5f);
+ if (ocrPdfCreatorProperties.GetTextColor() != null) {
+ paragraph.SetFontColor(ocrPdfCreatorProperties.GetTextColor());
+ }
+ else {
+ paragraph.SetTextRenderingMode(PdfCanvasConstants.TextRenderingMode.INVISIBLE);
+ }
+ canvas.ShowTextAligned(paragraph, xOffset + (float)imageCoordinates.x, yOffset + (float)imageCoordinates.y
+ , TextAlignment.LEFT);
+ if (ocrPdfCreatorProperties.IsTagged()) {
+ pdfCanvas.CloseTag();
+ }
+ canvas.Close();
}
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs
index dd31095..f5dec9b 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs
index 381a0c1..75482a5 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -116,6 +116,9 @@ public class OcrPdfCreatorProperties {
private IMetaInfo metaInfo;
+ /// Indicates whether the created pdf is tagged or not.
+ private bool tagged = false;
+
///
/// Creates a new
///
@@ -469,6 +472,36 @@ public virtual iText.Pdfocr.OcrPdfCreatorProperties SetImageRotationHandler(IIma
return this;
}
+ /// Defines whether pdf document should be tagged or not.
+ ///
+ ///
+ ///
+ /// if the result pdf is expected to be tagged,
+ ///
+ /// otherwise.
+ ///
+ ///
+ /// this
+ ///
+ /// instance.
+ ///
+ public virtual iText.Pdfocr.OcrPdfCreatorProperties SetTagged(bool tagged) {
+ this.tagged = tagged;
+ return this;
+ }
+
+ /// Retrieve information on whether pdf document should be tagged or not.
+ ///
+ ///
+ ///
+ /// if the result pdf is expected to be tagged,
+ ///
+ /// otherwise.
+ ///
+ public virtual bool IsTagged() {
+ return tagged;
+ }
+
///
/// Set meta info for this
/// .
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs
index 336d23d..052fe89 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -25,6 +25,8 @@ namespace iText.Pdfocr {
public class OcrProcessContext {
private AbstractPdfOcrEventHelper ocrEventHelper;
+ private IOcrProcessProperties ocrProcessProperties;
+
/// Creates an instance of ocr process context
/// helper class for working with events
public OcrProcessContext(AbstractPdfOcrEventHelper eventHelper) {
@@ -45,5 +47,17 @@ public virtual AbstractPdfOcrEventHelper GetOcrEventHelper() {
public virtual void SetOcrEventHelper(AbstractPdfOcrEventHelper eventHelper) {
this.ocrEventHelper = eventHelper;
}
+
+ /// Set extra OCR process properties.
+ /// extra OCR process properties.
+ internal virtual void SetOcrProcessProperties(IOcrProcessProperties ocrProcessProperties) {
+ this.ocrProcessProperties = ocrProcessProperties;
+ }
+
+ /// Get extra OCR process properties.
+ /// extra OCR process properties.
+ public virtual IOcrProcessProperties GetOcrProcessProperties() {
+ return ocrProcessProperties;
+ }
}
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs b/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs
index a25101a..00f7448 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -69,6 +69,7 @@ internal static float CalculateFontSize(Document document, String line, String f
float maxFontSize = bbox.GetHeight();
try {
Paragraph paragraph = new Paragraph(line);
+ paragraph.SetMargin(0);
paragraph.SetWidth(bbox.GetWidth());
paragraph.SetFontFamily(fontFamily);
while (Math.Abs(fontSize - maxFontSize) > 1e-1) {
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs
index c9f3fa1..0073ac2 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs
index 8ba277d..6bdfa92 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/ScaleMode.cs b/itext/itext.pdfocr.api/itext/pdfocr/ScaleMode.cs
index 5e93ad7..9d5a7cf 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/ScaleMode.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/ScaleMode.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs b/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs
index f74bc06..4f94fa6 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -22,6 +22,7 @@ You should have received a copy of the GNU Affero General Public License
*/
using System;
using iText.Kernel.Geom;
+using iText.Pdfocr.Structuretree;
namespace iText.Pdfocr {
///
@@ -38,6 +39,13 @@ public class TextInfo {
///
private Rectangle bboxRect;
+ ///
+ /// If LogicalStructureTreeItem is set, then
+ ///
+ /// s are expected to be in logical order.
+ ///
+ private LogicalStructureTreeItem logicalStructureTreeItem;
+
///
/// Creates a new
///
@@ -104,5 +112,27 @@ public virtual Rectangle GetBboxRect() {
public virtual void SetBboxRect(Rectangle bbox) {
this.bboxRect = new Rectangle(bbox);
}
+
+ /// Retrieves structure tree item for the text item.
+ /// structure tree item.
+ public virtual LogicalStructureTreeItem GetLogicalStructureTreeItem() {
+ return logicalStructureTreeItem;
+ }
+
+ /// Sets logical structure tree parent item for the text info.
+ ///
+ /// Sets logical structure tree parent item for the text info. It allows to organize text chunks
+ /// into logical hierarchy, e.g. specify document paragraphs, tables, etc.
+ ///
+ /// If LogicalStructureTreeItem is set, then the list of
+ ///
+ /// s in
+ ///
+ /// return value is expected to be in logical order.
+ ///
+ /// structure tree item.
+ public virtual void SetLogicalStructureTreeItem(LogicalStructureTreeItem logicalStructureTreeItem) {
+ this.logicalStructureTreeItem = logicalStructureTreeItem;
+ }
}
}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs
index 0e7712f..b43b8d9 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs
index 1f585f3..3ac996d 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -35,6 +35,8 @@ public class PdfOcrExceptionMessageConstant {
public const String STATISTICS_EVENT_TYPE_IS_NOT_DETECTED = "Statistics event type is not detected.";
+ public const String TAGGING_IS_NOT_SUPPORTED = "Tagging is not supported by the OCR engine.";
+
private PdfOcrExceptionMessageConstant() {
}
//Private constructor will prevent the instantiation of this class directly
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs
index d5544b9..0ec8afe 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs b/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs
index 46a2193..ed409dc 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs
index cd1f617..cd519e1 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs
index 19d45bd..3b2e0dc 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs
index 6c2d176..bc5f621 100644
--- a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs
+++ b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ArtifactItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ArtifactItem.cs
new file mode 100644
index 0000000..e0f1b7c
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ArtifactItem.cs
@@ -0,0 +1,49 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+namespace iText.Pdfocr.Structuretree {
+ /// This class represents artifact structure tree item.
+ ///
+ /// This class represents artifact structure tree item. Attaching such item to the text info means that
+ /// the text will be marked as artifact.
+ ///
+ public sealed class ArtifactItem : LogicalStructureTreeItem {
+ private static readonly iText.Pdfocr.Structuretree.ArtifactItem ARTIFACT_INSTANCE = new iText.Pdfocr.Structuretree.ArtifactItem
+ ();
+
+ private ArtifactItem()
+ : base() {
+ }
+
+ ///
+ /// Retrieve an instance of
+ /// .
+ ///
+ ///
+ /// an instance of
+ /// .
+ ///
+ public static iText.Pdfocr.Structuretree.ArtifactItem GetInstance() {
+ return ARTIFACT_INSTANCE;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/LogicalStructureTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/LogicalStructureTreeItem.cs
new file mode 100644
index 0000000..cfd0156
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/LogicalStructureTreeItem.cs
@@ -0,0 +1,125 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using System.Collections.Generic;
+using iText.Kernel.Pdf.Tagutils;
+
+namespace iText.Pdfocr.Structuretree {
+ /// This class represents structure tree item of the text item put into the pdf document.
+ ///
+ /// This class represents structure tree item of the text item put into the pdf document.
+ /// See
+ /// .
+ ///
+ public class LogicalStructureTreeItem {
+ private AccessibilityProperties accessibilityProperties;
+
+ private IList children = new List();
+
+ private iText.Pdfocr.Structuretree.LogicalStructureTreeItem parent;
+
+ ///
+ /// Instantiate a new
+ ///
+ /// instance.
+ ///
+ public LogicalStructureTreeItem()
+ : this(null) {
+ }
+
+ ///
+ /// Instantiate a new
+ ///
+ /// instance.
+ ///
+ /// properties to define and describe pdf structure elements.
+ public LogicalStructureTreeItem(AccessibilityProperties accessibilityProperties) {
+ this.accessibilityProperties = accessibilityProperties;
+ }
+
+ /// Retrieve structure tree element's properties.
+ /// structure tree element's properties.
+ public virtual AccessibilityProperties GetAccessibilityProperties() {
+ return accessibilityProperties;
+ }
+
+ /// Set structure tree element's properties.
+ /// structure tree element's properties.
+ ///
+ /// this
+ ///
+ /// instance.
+ ///
+ public virtual iText.Pdfocr.Structuretree.LogicalStructureTreeItem SetAccessibilityProperties(AccessibilityProperties
+ accessibilityProperties) {
+ this.accessibilityProperties = accessibilityProperties;
+ return this;
+ }
+
+ /// Retrieve parent structure tree item.
+ /// parent structure tree item.
+ public virtual iText.Pdfocr.Structuretree.LogicalStructureTreeItem GetParent() {
+ return parent;
+ }
+
+ /// Add child structure tree item.
+ /// child structure tree item.
+ ///
+ /// this
+ ///
+ /// instance.
+ ///
+ public virtual iText.Pdfocr.Structuretree.LogicalStructureTreeItem AddChild(iText.Pdfocr.Structuretree.LogicalStructureTreeItem
+ child) {
+ children.Add(child);
+ if (child.GetParent() != null) {
+ child.GetParent().RemoveChild(child);
+ }
+ child.parent = this;
+ return this;
+ }
+
+ /// Remove child structure tree item.
+ /// child structure tree item.
+ ///
+ ///
+ ///
+ /// if the child was removed,
+ ///
+ /// otherwise.
+ ///
+ public virtual bool RemoveChild(iText.Pdfocr.Structuretree.LogicalStructureTreeItem child) {
+ if (children.Remove(child)) {
+ child.parent = null;
+ return true;
+ }
+ return false;
+ }
+
+ /// Retrieve all child structure tree items.
+ /// all child structure tree items.
+ public virtual IList GetChildren() {
+ return children;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ParagraphTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ParagraphTreeItem.cs
new file mode 100644
index 0000000..10e1b1d
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ParagraphTreeItem.cs
@@ -0,0 +1,38 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Kernel.Pdf.Tagging;
+using iText.Kernel.Pdf.Tagutils;
+
+namespace iText.Pdfocr.Structuretree {
+ /// A convenience class to associate certain text items with the paragraph structure item.
+ public class ParagraphTreeItem : LogicalStructureTreeItem {
+ ///
+ /// Instantiate a new
+ ///
+ /// instance.
+ ///
+ public ParagraphTreeItem()
+ : base(new DefaultAccessibilityProperties(StandardRoles.P)) {
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/SpanTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/SpanTreeItem.cs
new file mode 100644
index 0000000..6473696
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/SpanTreeItem.cs
@@ -0,0 +1,38 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Kernel.Pdf.Tagging;
+using iText.Kernel.Pdf.Tagutils;
+
+namespace iText.Pdfocr.Structuretree {
+ /// A convenience class to associate certain text items with the span structure item.
+ public class SpanTreeItem : LogicalStructureTreeItem {
+ ///
+ /// Instantiate a new
+ ///
+ /// instance.
+ ///
+ public SpanTreeItem()
+ : base(new DefaultAccessibilityProperties(StandardRoles.SPAN)) {
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableCellTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableCellTreeItem.cs
new file mode 100644
index 0000000..ef022a1
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableCellTreeItem.cs
@@ -0,0 +1,38 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Kernel.Pdf.Tagging;
+using iText.Kernel.Pdf.Tagutils;
+
+namespace iText.Pdfocr.Structuretree {
+ /// A convenience class to associate certain text items with the table cell structure item.
+ public class TableCellTreeItem : LogicalStructureTreeItem {
+ ///
+ /// Instantiate a new
+ ///
+ /// instance.
+ ///
+ public TableCellTreeItem()
+ : base(new DefaultAccessibilityProperties(StandardRoles.TD)) {
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableRowTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableRowTreeItem.cs
new file mode 100644
index 0000000..8580a83
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableRowTreeItem.cs
@@ -0,0 +1,50 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Kernel.Pdf.Tagging;
+using iText.Kernel.Pdf.Tagutils;
+
+namespace iText.Pdfocr.Structuretree {
+ /// A convenience class to associate certain text items with the table row structure item.
+ public class TableRowTreeItem : LogicalStructureTreeItem {
+ ///
+ /// Instantiate a new
+ ///
+ /// instance.
+ ///
+ public TableRowTreeItem()
+ : base(new DefaultAccessibilityProperties(StandardRoles.TR)) {
+ }
+
+ /// Add a new table cell structure tree item to the table row.
+ /// table cell structure tree item to be added.
+ ///
+ /// this
+ ///
+ /// instance.
+ ///
+ public virtual iText.Pdfocr.Structuretree.TableRowTreeItem AddCell(TableCellTreeItem cellItem) {
+ AddChild(cellItem);
+ return this;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableTreeItem.cs
new file mode 100644
index 0000000..1887871
--- /dev/null
+++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableTreeItem.cs
@@ -0,0 +1,50 @@
+/*
+This file is part of the iText (R) project.
+Copyright (c) 1998-2024 Apryse Group NV
+Authors: Apryse Software.
+
+This program is offered under a commercial and under the AGPL license.
+For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
+
+AGPL licensing:
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+using iText.Kernel.Pdf.Tagging;
+using iText.Kernel.Pdf.Tagutils;
+
+namespace iText.Pdfocr.Structuretree {
+ /// A convenience class to associate certain text items with the table structure item.
+ public class TableTreeItem : LogicalStructureTreeItem {
+ ///
+ /// Instantiate a new
+ ///
+ /// instance.
+ ///
+ public TableTreeItem()
+ : base(new DefaultAccessibilityProperties(StandardRoles.TABLE)) {
+ }
+
+ /// Add a new row structure tree item to the table.
+ /// row structure tree item to be added.
+ ///
+ /// this
+ ///
+ /// instance.
+ ///
+ public virtual iText.Pdfocr.Structuretree.TableTreeItem AddRow(TableRowTreeItem rowItem) {
+ AddChild(rowItem);
+ return this;
+ }
+ }
+}
diff --git a/itext/itext.pdfocr.api/pdfocr-api.nuspec b/itext/itext.pdfocr.api/pdfocr-api.nuspec
index 9052bef..0d06e07 100644
--- a/itext/itext.pdfocr.api/pdfocr-api.nuspec
+++ b/itext/itext.pdfocr.api/pdfocr-api.nuspec
@@ -2,7 +2,7 @@
itext.pdfocr.api
- 3.0.1
+ 3.0.2
iText pdfOcr
Apryse Software
Apryse Software
@@ -12,13 +12,13 @@
true
pdfOCR is an iText add-on to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving
pdfOCR is an iText add-on for C# (.NET) to recognize and extract text in scanned documents and images
- Copyright (c) 1998-2023 Apryse Group NV
+ Copyright (c) 1998-2024 Apryse Group NV
en-US
OCR PDF ligatures text glyphs iText Optical Character Recognition PDF/A ISO-compliant Tesseract open-source opensource English Mandarin Chinese Hindi Spanish French Arabic Bengali Russian Portuguese Indonesian scan image extractable data searchable diacritic sdk c# .net
-
+
diff --git a/itext/itext.pdfocr.tesseract4/PdfOcrTesseract4Extensions.cs b/itext/itext.pdfocr.tesseract4/PdfOcrTesseract4Extensions.cs
index 5c52b7c..a7d6f31 100644
--- a/itext/itext.pdfocr.tesseract4/PdfOcrTesseract4Extensions.cs
+++ b/itext/itext.pdfocr.tesseract4/PdfOcrTesseract4Extensions.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs b/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs
index 3cd1be2..497e6c9 100644
--- a/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs
+++ b/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs
@@ -7,16 +7,16 @@
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("Apryse Group NV")]
[assembly: AssemblyProduct("iText")]
-[assembly: AssemblyCopyright ("Copyright (c) 1998-2023 Apryse Group NV")]
+[assembly: AssemblyCopyright("Copyright (c) 1998-2024 Apryse Group NV")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
[assembly: ComVisible(false)]
[assembly: Guid("0c4ceb00-9a56-4547-a925-5974a85a6048")]
-[assembly: AssemblyVersion("3.0.1.0")]
-[assembly: AssemblyFileVersion("3.0.1.0")]
-[assembly: AssemblyInformationalVersion("3.0.1")]
+[assembly: AssemblyVersion("3.0.2.0")]
+[assembly: AssemblyFileVersion("3.0.2.0")]
+[assembly: AssemblyInformationalVersion("3.0.2")]
[assembly: InternalsVisibleTo("itext.pdfocr.tesseract4.tests, PublicKey=" +
"00240000048000009400000006020000002400005253413100040000010001008b21ed5b3fc1c1" +
"1996390981fe22bbe71a39a9e11d3c2cefddd6ee92920fa871f9666ae0fa941af0280d0653df04" +
@@ -24,4 +24,4 @@
"009746bbdafcb75bcdbcecb7caf1f0f4b6e7d013906ba60b66eb1c8298e4efb052caf6cece4bf1" +
"816902cc")]
-[assembly: Versions.Attributes.KernelVersion("8.0.2.0")]
+[assembly: Versions.Attributes.KernelVersion("8.0.3.0")]
diff --git a/itext/itext.pdfocr.tesseract4/Properties/KernelVersionAttribute.cs b/itext/itext.pdfocr.tesseract4/Properties/KernelVersionAttribute.cs
index 58d560d..96767ba 100644
--- a/itext/itext.pdfocr.tesseract4/Properties/KernelVersionAttribute.cs
+++ b/itext/itext.pdfocr.tesseract4/Properties/KernelVersionAttribute.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs
index cc05a20..ad6ad60 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingOptions.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingOptions.cs
index ed5f177..4d99864 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingOptions.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingOptions.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs
index 8eddc44..1ba18d5 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/LeptonicaImageRotationHandler.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/LeptonicaImageRotationHandler.cs
index 90e8a86..c94ab5c 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/LeptonicaImageRotationHandler.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/LeptonicaImageRotationHandler.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/OutputFormat.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/OutputFormat.cs
index 3cdcb90..b42e5a8 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/OutputFormat.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/OutputFormat.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs
index 09b2b14..ac9a871 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs
index c34b05f..59d052f 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs
index 8251f32..b15802a 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs
index b59df4f..0f66c02 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs
index b500968..2c69b19 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs
index d874608..b74acff 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs
index 4870bfa..5002325 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs
index 8bf5ae3..4969cdf 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+ Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TextPositioning.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TextPositioning.cs
index 44f5afe..15a12a3 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TextPositioning.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TextPositioning.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs
index f6fc6a0..68c0304 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
@@ -34,11 +34,11 @@ public class PdfOcrTesseract4ProductData {
private const String PDF_OCR_TESSERACT4_PUBLIC_PRODUCT_NAME = "pdfOCR-Tesseract4";
- private const String PDF_OCR_VERSION = "3.0.1";
+ private const String PDF_OCR_VERSION = "3.0.2";
private const int PDF_OCR_COPYRIGHT_SINCE = 2000;
- private const int PDF_OCR_COPYRIGHT_TO = 2023;
+ private const int PDF_OCR_COPYRIGHT_TO = 2024;
private static readonly ProductData PDF_OCR_PRODUCT_DATA = new ProductData(PDF_OCR_TESSERACT4_PUBLIC_PRODUCT_NAME
, PDF_OCR_TESSERACT4_PRODUCT_NAME, PDF_OCR_VERSION, PDF_OCR_COPYRIGHT_SINCE, PDF_OCR_COPYRIGHT_TO);
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs
index 6e1826a..607cdfe 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs
index ff66562..2c8b0dc 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs
index ccfc0e4..8f05202 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs
index 760a20b..2825801 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs
index 04bc1d3..61e3699 100644
--- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs
+++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
-Copyright (c) 1998-2023 Apryse Group NV
+Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
diff --git a/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec b/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec
index c8e532e..f9e829b 100644
--- a/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec
+++ b/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec
@@ -2,7 +2,7 @@
itext.pdfocr.tesseract4
- 3.0.1
+ 3.0.2
iText pdfOcr
Apryse Software
Apryse Software
@@ -12,13 +12,13 @@
true
pdfOCR is an iText add-on to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving
pdfOCR is an iText add-on for C# (.NET) to recognize and extract text in scanned documents and images
- Copyright (c) 1998-2023 Apryse Group NV
+ Copyright (c) 1998-2024 Apryse Group NV
en-US
OCR PDF ligatures text glyphs iText Optical Character Recognition PDF/A ISO-compliant Tesseract open-source opensource English Mandarin Chinese Hindi Spanish French Arabic Bengali Russian Portuguese Indonesian scan image extractable data searchable diacritic sdk c# .net
-
+
diff --git a/port-hash b/port-hash
index 020660f..9e2b362 100644
--- a/port-hash
+++ b/port-hash
@@ -1 +1 @@
-df7d64395362a4d8c3223f21563291963638d049
+151cd11a394f65e278ed1cafd7d8a24a461940ad