diff --git a/doxyfile b/doxyfile index 51d9aa5..06dcdbe 100644 --- a/doxyfile +++ b/doxyfile @@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8 # title of most generated pages and in a few other places. # The default value is: My Project. -PROJECT_NAME = "pdfOCR 3.0.1 API" +PROJECT_NAME = "pdfOCR 3.0.2 API" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version diff --git a/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs b/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs index 591395c..9628aa2 100644 --- a/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs +++ b/itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs @@ -7,7 +7,7 @@ [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("Apryse Group NV")] [assembly: AssemblyProduct("iText")] -[assembly: AssemblyCopyright ("Copyright (c) 1998-2023 Apryse Group NV")] +[assembly: AssemblyCopyright("Copyright (c) 1998-2024 Apryse Group NV")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] @@ -15,6 +15,6 @@ [assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")] -[assembly: AssemblyVersion("3.0.1.0")] -[assembly: AssemblyFileVersion("3.0.1.0")] -[assembly: AssemblyInformationalVersion("3.0.1")] +[assembly: AssemblyVersion("3.0.2.0")] +[assembly: AssemblyFileVersion("3.0.2.0")] +[assembly: AssemblyInformationalVersion("3.0.2")] diff --git a/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj b/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj index 0b5e41e..d4f8850 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj +++ b/itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj @@ -25,9 +25,9 @@ - + - + diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs index e0bb646..f66ea7d 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -30,7 +30,9 @@ You should have received a copy of the GNU Affero General Public License using iText.Kernel.Font; using iText.Kernel.Geom; using iText.Kernel.Pdf; +using iText.Kernel.Utils; using iText.Pdfa; +using iText.Pdfocr.Exceptions; using iText.Pdfocr.Helpers; using iText.Pdfocr.Logs; using iText.Test; @@ -193,6 +195,36 @@ public virtual void TestImageRotationHandlerForTiff() { ; } + [NUnit.Framework.Test] + public virtual void TestTableStructureTree() { + String pdfPath = PdfHelper.GetTargetDirectory() + "tableStructureTree.pdf"; + // Image doesn't really matter here + String input = PdfHelper.GetImagesTestDirectory() + "numbers_01.jpg"; + IOcrEngine ocrEngine = new TestStructureDetectionOcrEngine(); + OcrPdfCreatorProperties creatorProperties = new OcrPdfCreatorProperties(); + creatorProperties.SetTextColor(DeviceRgb.RED); + creatorProperties.SetTagged(true); + OcrPdfCreator pdfCreator = new OcrPdfCreator(ocrEngine, creatorProperties); + TestProcessProperties processProperties = new TestProcessProperties(5, 6, 50, 15, 100, 200); + using (PdfWriter pdfWriter = PdfHelper.GetPdfWriter(pdfPath)) { + pdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList(new FileInfo(input)), pdfWriter, new DocumentProperties + (), processProperties).Close(); + } + NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(pdfPath, PdfHelper.TEST_DIRECTORY + "cmp_tableStructureTree.pdf" + , PdfHelper.GetTargetDirectory(), "diff_")); + } + + [NUnit.Framework.Test] + [LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, LogLevel = LogLevelConstants.ERROR)] + public virtual void TestTaggingNotSupported() { + String input = PdfHelper.GetImagesTestDirectory() + "numbers_01.jpg"; + String pdfPath = PdfHelper.GetTargetDirectory() + "taggingNotSupported.pdf"; + Exception e = NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => PdfHelper.CreatePdf(pdfPath, new + FileInfo(input), new OcrPdfCreatorProperties().SetTagged(true))); + NUnit.Framework.Assert.AreEqual(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT + , PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED), e.Message); + } + internal class NotImplementedImageRotationHandler : IImageRotationHandler { public virtual ImageData ApplyRotation(ImageData imageData) { throw new Exception("applyRotation is not implemented"); diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs index d5f78d1..e748594 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs index da4aa6b..a9acf93 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs index 25fdf10..a736190 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs index eae0faf..564aae6 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs index 4c96e66..8fe29c6 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs index 4d98300..a97282d 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs index 1574f9e..c4553a3 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs index 63b6ad4..5ba8dbd 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs index 1d13ec0..8cb0274 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs index c29dbe3..a200d29 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs index 06ca089..957efe6 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs index d1ff9f0..0dcca43 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/ExtractionStrategy.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/ExtractionStrategy.cs index 2269e5e..be4fa7f 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/ExtractionStrategy.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/ExtractionStrategy.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs index e95a9e6..ebba8f9 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestProcessProperties.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestProcessProperties.cs new file mode 100644 index 0000000..9868221 --- /dev/null +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestProcessProperties.cs @@ -0,0 +1,73 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using iText.Pdfocr; + +namespace iText.Pdfocr.Helpers { + public class TestProcessProperties : IOcrProcessProperties { + private float cellWidth; + + private float cellHeight; + + private float startX; + + private float startY; + + private int rowCount; + + private int columnCount; + + public TestProcessProperties(int rowCount, int columnCount, float cellWidth, float cellHeight, float startX + , float startY) { + this.rowCount = rowCount; + this.columnCount = columnCount; + this.cellWidth = cellWidth; + this.cellHeight = cellHeight; + this.startX = startX; + this.startY = startY; + } + + public virtual int GetRowCount() { + return rowCount; + } + + public virtual int GetColumnCount() { + return columnCount; + } + + public virtual float GetCellWidth() { + return cellWidth; + } + + public virtual float GetCellHeight() { + return cellHeight; + } + + public virtual float GetStartX() { + return startX; + } + + public virtual float GetStartY() { + return startY; + } + } +} diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestStructureDetectionOcrEngine.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestStructureDetectionOcrEngine.cs new file mode 100644 index 0000000..7970f00 --- /dev/null +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/TestStructureDetectionOcrEngine.cs @@ -0,0 +1,89 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using System.Collections.Generic; +using System.IO; +using iText.Kernel.Geom; +using iText.Pdfocr; +using iText.Pdfocr.Structuretree; + +namespace iText.Pdfocr.Helpers { + public class TestStructureDetectionOcrEngine : IOcrEngine { + public TestStructureDetectionOcrEngine() { + } + + public virtual IDictionary> DoImageOcr(FileInfo input) { + return null; + } + + public virtual IDictionary> DoImageOcr(FileInfo input, OcrProcessContext ocrProcessContext + ) { + TestProcessProperties processProperties = (TestProcessProperties)ocrProcessContext.GetOcrProcessProperties + (); + IList textItems = new List(); + TableTreeItem table = new TableTreeItem(); + float cellWidth = processProperties.GetCellWidth(); + float cellHeight = processProperties.GetCellHeight(); + float startX = processProperties.GetStartX(); + float startY = processProperties.GetStartY(); + float x = startX; + float y = startY; + for (int i = 0; i < processProperties.GetRowCount(); ++i) { + TableRowTreeItem row = null; + if (i > 0) { + row = new TableRowTreeItem(); + table.AddRow(row); + } + for (int j = 0; j < processProperties.GetColumnCount(); ++j) { + TextInfo textInfo = new TextInfo(i + " " + j, new Rectangle(x, y, cellWidth, cellHeight)); + // Mark the 1st row item as artifacts + if (i == 0) { + textInfo.SetLogicalStructureTreeItem(ArtifactItem.GetInstance()); + } + else { + TableCellTreeItem cell = new TableCellTreeItem(); + row.AddCell(cell); + ParagraphTreeItem paragraph = new ParagraphTreeItem(); + cell.AddChild(paragraph); + SpanTreeItem span = new SpanTreeItem(); + paragraph.AddChild(span); + textInfo.SetLogicalStructureTreeItem(span); + } + textItems.Add(textInfo); + x += cellWidth; + } + x = startX; + y -= cellHeight; + } + IDictionary> result = new Dictionary>(); + result.Put(1, textItems); + return result; + } + + public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile) { + } + + public virtual void CreateTxtFile(IList inputImages, FileInfo txtFile, OcrProcessContext ocrProcessContext + ) { + } + } +} diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs index d3e56fd..91b642e 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs index 8b941e6..db07fa1 100644 --- a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/LogicalStructureTreeItemTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/LogicalStructureTreeItemTest.cs new file mode 100644 index 0000000..f2942e4 --- /dev/null +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/LogicalStructureTreeItemTest.cs @@ -0,0 +1,63 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using iText.Kernel.Pdf.Tagutils; +using iText.Test; + +namespace iText.Pdfocr.Structuretree { + [NUnit.Framework.Category("UnitTest")] + public class LogicalStructureTreeItemTest : ExtendedITextTest { + [NUnit.Framework.Test] + public virtual void AddChildTest() { + LogicalStructureTreeItem parent = new LogicalStructureTreeItem(); + LogicalStructureTreeItem child1 = new LogicalStructureTreeItem(); + LogicalStructureTreeItem child2 = new LogicalStructureTreeItem(); + child1.AddChild(child2); + parent.AddChild(child1); + parent.AddChild(child2); + NUnit.Framework.Assert.AreEqual(2, parent.GetChildren().Count); + NUnit.Framework.Assert.AreEqual(0, child1.GetChildren().Count); + NUnit.Framework.Assert.AreEqual(parent, child1.GetParent()); + NUnit.Framework.Assert.AreEqual(parent, child2.GetParent()); + } + + [NUnit.Framework.Test] + public virtual void RemoveChildTest() { + LogicalStructureTreeItem parent = new LogicalStructureTreeItem(); + LogicalStructureTreeItem child1 = new LogicalStructureTreeItem(); + LogicalStructureTreeItem child2 = new LogicalStructureTreeItem(); + child1.AddChild(child2); + parent.AddChild(child1); + parent.AddChild(child2); + NUnit.Framework.Assert.IsTrue(parent.RemoveChild(child1)); + NUnit.Framework.Assert.IsFalse(parent.RemoveChild(child1)); + NUnit.Framework.Assert.AreEqual(1, parent.GetChildren().Count); + } + + [NUnit.Framework.Test] + public virtual void AccessibilityPropertiesTest() { + LogicalStructureTreeItem item = new LogicalStructureTreeItem().SetAccessibilityProperties(new DefaultAccessibilityProperties + ("Some role")); + NUnit.Framework.Assert.AreEqual("Some role", item.GetAccessibilityProperties().GetRole()); + } + } +} diff --git a/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/TableTreeStructureTest.cs b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/TableTreeStructureTest.cs new file mode 100644 index 0000000..3e61144 --- /dev/null +++ b/itext.tests/itext.pdfocr.api.tests/itext/pdfocr/structuretree/TableTreeStructureTest.cs @@ -0,0 +1,43 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using iText.Test; + +namespace iText.Pdfocr.Structuretree { + [NUnit.Framework.Category("UnitTest")] + public class TableTreeStructureTest : ExtendedITextTest { + [NUnit.Framework.Test] + public virtual void TableTreeTest() { + TableTreeItem table = new TableTreeItem().AddRow(new TableRowTreeItem().AddCell((TableCellTreeItem)new TableCellTreeItem + ().AddChild(new ParagraphTreeItem().AddChild(new SpanTreeItem()))).AddCell((TableCellTreeItem)new TableCellTreeItem + ().AddChild(new ParagraphTreeItem().AddChild(new SpanTreeItem())))).AddRow(new TableRowTreeItem().AddCell + ((TableCellTreeItem)new TableCellTreeItem().AddChild(new ParagraphTreeItem().AddChild(new SpanTreeItem + ()))).AddCell((TableCellTreeItem)new TableCellTreeItem().AddChild(new ParagraphTreeItem().AddChild(new + SpanTreeItem())))); + NUnit.Framework.Assert.AreEqual(2, table.GetChildren().Count); + NUnit.Framework.Assert.AreEqual(2, table.GetChildren()[0].GetChildren().Count); + NUnit.Framework.Assert.AreEqual(1, table.GetChildren()[0].GetChildren()[0].GetChildren().Count); + NUnit.Framework.Assert.AreEqual(1, table.GetChildren()[0].GetChildren()[0].GetChildren()[0].GetChildren(). + Count); + } + } +} diff --git a/itext.tests/itext.pdfocr.api.tests/resources/itext/pdfocr/cmp_tableStructureTree.pdf b/itext.tests/itext.pdfocr.api.tests/resources/itext/pdfocr/cmp_tableStructureTree.pdf new file mode 100644 index 0000000..6cc8faa Binary files /dev/null and b/itext.tests/itext.pdfocr.api.tests/resources/itext/pdfocr/cmp_tableStructureTree.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs b/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs index 9b7d29f..3e21341 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/Properties/AssemblyInfo.cs @@ -7,7 +7,7 @@ [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("Apryse Group NV")] [assembly: AssemblyProduct("iText")] -[assembly: AssemblyCopyright ("Copyright (c) 1998-2023 Apryse Group NV")] +[assembly: AssemblyCopyright("Copyright (c) 1998-2024 Apryse Group NV")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] @@ -15,6 +15,6 @@ [assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")] -[assembly: AssemblyVersion("3.0.1.0")] -[assembly: AssemblyFileVersion("3.0.1.0")] -[assembly: AssemblyInformationalVersion("3.0.1")] +[assembly: AssemblyVersion("3.0.2.0")] +[assembly: AssemblyFileVersion("3.0.2.0")] +[assembly: AssemblyInformationalVersion("3.0.2")] diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj b/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj index d41bb1b..831a886 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext.pdfocr.tesseract4.tests.csproj @@ -26,9 +26,9 @@ - + - + diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs index 8aa5a2b..c7a4493 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationEventHandlingTestHelper.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs index a2a67eb..59d313b 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/IntegrationTestHelper.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs index be21535..16f0783 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs index 5a65e76..64e9280 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs index 57f783f..2599336 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs index aca97e6..a26020b 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs index 1124167..5d14732 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/events/PdfOcrTesseract4ProductEventTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs index 74f1f41..0901e61 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/exceptions/PdfOcrTesseract4ExceptionTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationExecutableTest.cs index 9b07496..f9dce5a 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationLibTest.cs index 093f508..fcc6176 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs index ffddb99..eb96c00 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/general/BasicTesseractIntegrationTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.cs index 4687576..c5f7e86 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationLibTest.cs index 9ccba2f..c970fe1 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs index aa5d45d..d618cd9 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/imageformats/ImageFormatIntegrationTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.cs index 6dd553d..5f38227 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.cs index 2f87cb5..95f9105 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs index 6391171..e204f6f 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdfa3u/PdfA3UIntegrationTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.cs index cca27e0..910e64e 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationLibTest.cs index 44c86c9..f3f8297 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs index ff03ff1..96ac6ad 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/pdflayers/PdfLayersIntegrationTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationExecutableTest.cs index 0743ce5..1bc1ed3 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs index 6f928b7..3c58c5a 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -84,10 +84,10 @@ public virtual void CompareInvoiceFrontThaiImage() { tesseractReader.SetTesseract4OcrEngineProperties(properties); DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList ("tha", "eng"), JavaUtil.ArraysAsList(NOTO_SANS_THAI_FONT_PATH, NOTO_SANS_FONT_PATH), DeviceRgb.RED); - bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY - , "diff_") == null; - bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY - , "diff_") == null; + bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, GetTargetDirectory( + ), "diff_") == null; + bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory + (), "diff_") == null; NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest); } @@ -108,10 +108,10 @@ public virtual void CompareThaiTextImage() { tesseractReader.SetTesseract4OcrEngineProperties(properties); DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList ("tha"), JavaUtil.ArraysAsList(NOTO_SANS_THAI_FONT_PATH), DeviceRgb.RED); - bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY - , "diff_") == null; - bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY - , "diff_") == null; + bool javaTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathJava, GetTargetDirectory( + ), "diff_") == null; + bool dotNetTest = new CompareTool().CompareByContent(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory + (), "diff_") == null; NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest); } } diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs index 67d7d89..b40c7b0 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tessdata/TessDataIntegrationTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -290,8 +290,8 @@ public virtual void CompareMultiLangImage() { tesseractReader.SetTesseract4OcrEngineProperties(properties); DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList ("eng", "deu", "spa"), DeviceCmyk.BLACK); - NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, TEST_DOCUMENTS_DIRECTORY - , "diff_")); + NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(resultPdfPath, expectedPdfPath, GetTargetDirectory + (), "diff_")); } finally { NUnit.Framework.Assert.AreEqual(TextPositioning.BY_WORDS, tesseractReader.GetTesseract4OcrEngineProperties diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs index a9d4410..b1f5638 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ApiTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationExecutableTest.cs index 732a7be..d39752f 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationLibTest.cs index 320f340..224a22f 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs index 72ff0bc..249bc6f 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImageIntegrationTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -93,10 +93,10 @@ public virtual void CompareRotatedImage() { // Because of difference of tesseract 5 and tesseract 4 there're some differences in text recognition. // So the goal of this test is to make text invisible and check if image is rotated. // Proper text recognition is compared in testHocrRotatedImage test by checking HOCR file. - bool javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY - , "diff_") == null; - bool dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY + bool javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, GetTargetDirectory() , "diff_") == null; + bool dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory + (), "diff_") == null; NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest); filename = "180_degrees_rotated"; expectedPdfPathJava = TEST_DOCUMENTS_DIRECTORY + filename + "_java.pdf"; @@ -104,10 +104,10 @@ public virtual void CompareRotatedImage() { resultPdfPath = GetTargetDirectory() + filename + "_" + testName + ".pdf"; DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList ("eng"), JavaUtil.ArraysAsList(NOTO_SANS_FONT_PATH), null, true); - javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY, + javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, GetTargetDirectory(), "diff_" + ) == null; + dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory(), "diff_") == null; - dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY - , "diff_") == null; NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest); filename = "270_degrees_rotated"; expectedPdfPathJava = TEST_DOCUMENTS_DIRECTORY + filename + "_java.pdf"; @@ -115,10 +115,10 @@ public virtual void CompareRotatedImage() { resultPdfPath = GetTargetDirectory() + filename + "_" + testName + ".pdf"; DoOcrAndSavePdfToPath(tesseractReader, TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath, JavaUtil.ArraysAsList ("eng"), JavaUtil.ArraysAsList(NOTO_SANS_FONT_PATH), null, true); - javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, TEST_DOCUMENTS_DIRECTORY, + javaTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathJava, GetTargetDirectory(), "diff_" + ) == null; + dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, GetTargetDirectory(), "diff_") == null; - dotNetTest = new CompareTool().CompareVisually(resultPdfPath, expectedPdfPathDotNet, TEST_DOCUMENTS_DIRECTORY - , "diff_") == null; NUnit.Framework.Assert.IsTrue(javaTest || dotNetTest); } } diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs index 9b7a83f..03efcff 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/ImagePreprocessingUtilTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs index 1474511..a0022c9 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelperTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs index 933486c..48684f7 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs index 2be9fed..3e5296d 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs index f8c848b..a2c33a3 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/Tesseract4MetaInfoEventHandlingTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperExecutableTest.cs index 7d1f7b3..3c05e98 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs index 927ce80..058d2e3 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs index f9d7f2a..8aa6101 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractHelperTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs index 7edbb95..56d42a2 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/TesseractOcrUtilTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsExecutableTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsExecutableTest.cs index 453cd95..ddc71cb 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsExecutableTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsExecutableTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsLibTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsLibTest.cs index 3746c69..92b578e 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsLibTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsLibTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs index 205741a..b42b87a 100644 --- a/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs +++ b/itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/tesseract4/UserWordsTest.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_executable.pdf index 5d706d0..0ec47a6 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_executable.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_lib.pdf index 004e5ef..be02c79 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/englishText_lib.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_executable.pdf index eaed8dd..0ed0207 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_executable.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_lib.pdf index 29d2871..43fcb43 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_01_lib.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_02.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_02.pdf index 9e1fba5..787908d 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_02.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/example_02.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_dotnet.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_dotnet.pdf index 011d403..1f06512 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_dotnet.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_dotnet.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_java.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_java.pdf index 616b588..eb6415a 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_java.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/invoice_front_thai_lib_java.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_executable.pdf index 4441b9e..98a56df 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_executable.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_lib.pdf index 7fa0333..ade75fc 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multilang_lib.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_executable.pdf index 35b6538..78c055d 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_executable.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_lib.pdf index 1a726fe..f59e8e7 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/multipage_lib.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01.pdf index 882a45d..ef945e6 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_a3u.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_a3u.pdf index 7657a62..5faffc2 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_a3u.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_a3u.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_executable.pdf index 0bbd75b..2351b54 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_executable.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_lib.pdf index cafdbf5..0461509 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareJpe_lib.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_executable.pdf index f74a7df..695f84c 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_executable.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_lib.pdf index ea13a39..e12d776 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_01_compareTif_lib.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_executable.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_executable.pdf index c86db65..3191b10 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_executable.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_executable.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_lib.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_lib.pdf index 867a2f5..dad14bf 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_lib.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/numbers_02_compareJpg_lib.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/spanish_01_a3u.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/spanish_01_a3u.pdf index 68c24cf..2cc35a4 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/spanish_01_a3u.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/spanish_01_a3u.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_dotnet.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_dotnet.pdf index 68f5d7e..c9c31de 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_dotnet.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_dotnet.pdf differ diff --git a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_java.pdf b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_java.pdf index b65d60c..1b27fe1 100644 Binary files a/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_java.pdf and b/itext.tests/itext.pdfocr.tesseract4.tests/resources/itext/pdfocr/documents/thai_01_lib_java.pdf differ diff --git a/itext/itext.pdfocr.api/PdfOcrExtensions.cs b/itext/itext.pdfocr.api/PdfOcrExtensions.cs index b4c44f4..dad4f06 100644 --- a/itext/itext.pdfocr.api/PdfOcrExtensions.cs +++ b/itext/itext.pdfocr.api/PdfOcrExtensions.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs b/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs index 5c64f22..2cd884e 100644 --- a/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs +++ b/itext/itext.pdfocr.api/Properties/AssemblyInfo.cs @@ -7,16 +7,16 @@ [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("Apryse Group NV")] [assembly: AssemblyProduct("iText")] -[assembly: AssemblyCopyright ("Copyright (c) 1998-2023 Apryse Group NV")] +[assembly: AssemblyCopyright("Copyright (c) 1998-2024 Apryse Group NV")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] [assembly: ComVisible(false)] [assembly: Guid("0c4ceb00-9a56-4547-a925-5974a85a6048")] -[assembly: AssemblyVersion("3.0.1.0")] -[assembly: AssemblyFileVersion("3.0.1.0")] -[assembly: AssemblyInformationalVersion("3.0.1")] +[assembly: AssemblyVersion("3.0.2.0")] +[assembly: AssemblyFileVersion("3.0.2.0")] +[assembly: AssemblyInformationalVersion("3.0.2")] [assembly: InternalsVisibleTo("itext.pdfocr.api.tests, PublicKey=" + "00240000048000009400000006020000002400005253413100040000010001008b21ed5b3fc1c1" + "1996390981fe22bbe71a39a9e11d3c2cefddd6ee92920fa871f9666ae0fa941af0280d0653df04" + diff --git a/itext/itext.pdfocr.api/itext.pdfocr.api.csproj b/itext/itext.pdfocr.api/itext.pdfocr.api.csproj index 7d3ba8d..6e1943e 100644 --- a/itext/itext.pdfocr.api/itext.pdfocr.api.csproj +++ b/itext/itext.pdfocr.api/itext.pdfocr.api.csproj @@ -30,7 +30,7 @@ - + diff --git a/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs b/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs index b8dae29..73667ec 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/AbstractPdfOcrEventHelper.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IImageRotationHandler.cs b/itext/itext.pdfocr.api/itext/pdfocr/IImageRotationHandler.cs index 94e176b..18bac1b 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/IImageRotationHandler.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/IImageRotationHandler.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs b/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs index 15b0f2a..dc5375f 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/IOcrEngine.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IOcrProcessProperties.cs b/itext/itext.pdfocr.api/itext/pdfocr/IOcrProcessProperties.cs new file mode 100644 index 0000000..8a68583 --- /dev/null +++ b/itext/itext.pdfocr.api/itext/pdfocr/IOcrProcessProperties.cs @@ -0,0 +1,30 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +namespace iText.Pdfocr { + /// + /// OCR properties passed to the OCR engine as part of + /// . + /// + public interface IOcrProcessProperties { + } +} diff --git a/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs b/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs index 282723c..1becbb2 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/IProductAware.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs index 78544e5..939c5cb 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrEngineProperties.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs index 727e395..2d2dbee 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreator.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -36,6 +36,7 @@ You should have received a copy of the GNU Affero General Public License using iText.Kernel.Pdf; using iText.Kernel.Pdf.Canvas; using iText.Kernel.Pdf.Layer; +using iText.Kernel.Pdf.Tagutils; using iText.Layout; using iText.Layout.Element; using iText.Layout.Font; @@ -44,6 +45,7 @@ You should have received a copy of the GNU Affero General Public License using iText.Pdfocr.Exceptions; using iText.Pdfocr.Logs; using iText.Pdfocr.Statistics; +using iText.Pdfocr.Structuretree; namespace iText.Pdfocr { /// @@ -193,13 +195,17 @@ public void SetOcrPdfCreatorProperties(OcrPdfCreatorProperties ocrPdfCreatorProp /// /// for PDF/A-3u document /// + /// + /// extra OCR process properties passed to + /// + /// /// /// result PDF/A-3u /// /// object /// public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties - , PdfOutputIntent pdfOutputIntent) { + , PdfOutputIntent pdfOutputIntent, IOcrProcessProperties ocrProcessProperties) { LOGGER.LogInformation(MessageFormatUtil.Format(PdfOcrLogMessageConstant.START_OCR_FOR_IMAGES, inputImages. Count)); // create event helper @@ -207,6 +213,7 @@ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, OcrPdfCreatorEventHelper ocrEventHelper = new OcrPdfCreatorEventHelper(pdfSequenceId, ocrPdfCreatorProperties .GetMetaInfo()); OcrProcessContext ocrProcessContext = new OcrProcessContext(ocrEventHelper); + ocrProcessContext.SetOcrProcessProperties(ocrProcessProperties); // map contains: // keys: image files // values: @@ -277,6 +284,113 @@ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, return CreatePdfA(inputImages, pdfWriter, new DocumentProperties(), pdfOutputIntent); } + /// + /// Performs OCR with set parameters using provided + /// + /// and + /// creates PDF using provided + /// + /// , + /// + /// and + /// . + /// + /// + /// Performs OCR with set parameters using provided + /// + /// and + /// creates PDF using provided + /// + /// , + /// + /// and + /// + /// . PDF/A-3u document will be created if + /// provided + /// + /// is not null. + /// + /// NOTE that after executing this method you will have a product event from + /// the both itextcore and pdfOcr. Therefore, use this method only if you need to work + /// with the generated + /// + /// . If you don't need this, use the + /// + /// method. In this case, only the pdfOcr event will be dispatched. + /// + /// + /// + /// + /// of images to be OCRed + /// + /// + /// the + /// + /// object + /// to write final PDF document to + /// + /// document properties + /// + /// + /// + /// for PDF/A-3u document + /// + /// + /// result PDF/A-3u + /// + /// object + /// + public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties + , PdfOutputIntent pdfOutputIntent) { + return CreatePdfA(inputImages, pdfWriter, documentProperties, pdfOutputIntent, null); + } + + /// + /// Performs OCR with set parameters using provided + /// + /// and + /// creates PDF using provided + /// . + /// + /// + /// Performs OCR with set parameters using provided + /// + /// and + /// creates PDF using provided + /// . + /// + /// NOTE that after executing this method you will have a product event from + /// the both itextcore and pdfOcr. Therefore, use this method only if you need to work + /// with the generated + /// + /// . If you don't need this, use the + /// + /// method. In this case, only the pdfOcr event will be dispatched. + /// + /// + /// + /// + /// of images to be OCRed + /// + /// + /// the + /// + /// object + /// to write final PDF document to + /// + /// document properties + /// extra OCR process properties passed to OcrProcessContext + /// + /// result + /// + /// object + /// + public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties + , IOcrProcessProperties ocrProcessProperties) { + return CreatePdfA(inputImages, pdfWriter, documentProperties, null, ocrProcessProperties); + } + /// /// Performs OCR with set parameters using provided /// @@ -318,7 +432,7 @@ public PdfDocument CreatePdfA(IList inputImages, PdfWriter pdfWriter, /// public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter, DocumentProperties documentProperties ) { - return CreatePdfA(inputImages, pdfWriter, documentProperties, null); + return CreatePdfA(inputImages, pdfWriter, documentProperties, null, null); } /// @@ -360,7 +474,7 @@ public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter, D /// object /// public PdfDocument CreatePdf(IList inputImages, PdfWriter pdfWriter) { - return CreatePdfA(inputImages, pdfWriter, new DocumentProperties(), null); + return CreatePdfA(inputImages, pdfWriter, new DocumentProperties(), null, null); } /// @@ -510,7 +624,25 @@ private void AddToCanvas(PdfDocument pdfDocument, Rectangle imageSize, IList flatLogicalTree = new Dictionary(); + if (ocrPdfCreatorProperties.IsTagged()) { + // Logical tree, a list of top items, children can be retrieved out of them + IList logicalTree = new List(); + // A map of leaf LogicalStructureTreeItem's to TextInfo's attached to these leaves + IDictionary> leavesTextInfos = new Dictionary>(); + bool taggedSupported = GetLogicalTree(pageText, logicalTree, leavesTextInfos); + if (!taggedSupported) { + throw new PdfOcrException(PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED); + } + pdfDocument.SetTagged(); + // Create a map of TextInfo to tag pointers meanwhile creating the required tags. + // Tag pointers are later used to put all the required info into canvas (content stream) + BuildLogicalTreeAndFlatten(logicalTree, leavesTextInfos, new TagTreePointer(pdfDocument).SetPageForTagging + (pdfPage), flatLogicalTree); + } + AddTextToCanvas(imageSize, pageText, flatLogicalTree, canvas, multiplier, pdfPage); } catch (PdfOcrException e) { LOGGER.LogError(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, e.Message @@ -614,6 +746,9 @@ private void AddDataToPdfDocument(IDictionarycanvas to place the image private void AddImageToCanvas(ImageData imageData, Rectangle imageSize, PdfCanvas pdfCanvas) { if (imageData != null) { + if (ocrPdfCreatorProperties.IsTagged()) { + pdfCanvas.OpenTag(new CanvasArtifact()); + } if (ocrPdfCreatorProperties.GetPageSize() == null) { pdfCanvas.AddImageFittedIntoRectangle(imageData, imageSize, false); } @@ -624,6 +759,84 @@ private void AddImageToCanvas(ImageData imageData, Rectangle imageSize, PdfCanva .GetHeight()); pdfCanvas.AddImageFittedIntoRectangle(imageData, rect, false); } + if (ocrPdfCreatorProperties.IsTagged()) { + pdfCanvas.CloseTag(); + } + } + } + + /// + /// + /// + /// if tagging supported by the engine. + /// + [System.ObsoleteAttribute(@"In next major version we need to add boolean taggingSupported() method into IOcrEngine and throw exception in OcrPdfCreator constructor if taggingSupported() returns false but OcrPdfCreatorProperties.getTagged returns true." + )] + private static bool GetLogicalTree(IList textInfos, IList logicalStructureTreeItems + , IDictionary> leavesTextInfos) { + bool taggedSupported = false; + if (textInfos == null) { + return taggedSupported; + } + foreach (TextInfo textInfo in textInfos) { + LogicalStructureTreeItem structTreeItem = textInfo.GetLogicalStructureTreeItem(); + LogicalStructureTreeItem topParent; + if (structTreeItem is ArtifactItem) { + continue; + } + else { + if (structTreeItem != null) { + topParent = GetTopParent(structTreeItem); + taggedSupported = true; + } + else { + structTreeItem = new LogicalStructureTreeItem(); + textInfo.SetLogicalStructureTreeItem(structTreeItem); + topParent = structTreeItem; + } + } + IList textInfosPerStructItem = leavesTextInfos.Get(structTreeItem); + if (textInfosPerStructItem == null) { + textInfosPerStructItem = new List(); + textInfosPerStructItem.Add(textInfo); + leavesTextInfos.Put(structTreeItem, textInfosPerStructItem); + } + else { + textInfosPerStructItem.Add(textInfo); + } + if (!logicalStructureTreeItems.Contains(topParent)) { + logicalStructureTreeItems.Add(topParent); + } + } + return taggedSupported; + } + + private static LogicalStructureTreeItem GetTopParent(LogicalStructureTreeItem structInfo) { + if (structInfo.GetParent() != null) { + return GetTopParent(structInfo.GetParent()); + } + else { + return structInfo; + } + } + + private void BuildLogicalTreeAndFlatten(IList logicalStructureTreeItems, IDictionary + > leavesTextInfos, TagTreePointer tagPointer, IDictionary flatLogicalTree) { + foreach (LogicalStructureTreeItem structTreeItem in logicalStructureTreeItems) { + AccessibilityProperties accessibilityProperties = structTreeItem.GetAccessibilityProperties(); + if (accessibilityProperties == null) { + accessibilityProperties = new DefaultAccessibilityProperties(PdfName.Span.GetValue()); + } + tagPointer.AddTag(accessibilityProperties); + IList textItems = leavesTextInfos.Get(structTreeItem); + if (textItems != null) { + foreach (TextInfo item in textItems) { + flatLogicalTree.Put(item, new TagTreePointer(tagPointer)); + } + } + BuildLogicalTreeAndFlatten(structTreeItem.GetChildren(), leavesTextInfos, tagPointer, flatLogicalTree); + tagPointer.MoveToParent(); } } @@ -633,45 +846,61 @@ private void AddImageToCanvas(ImageData imageData, Rectangle imageSize, PdfCanva /// /// /// text that was found on this image (or on this page) + /// a map of TextInfo to a tag pointer /// canvas to place the text /// coefficient to adjust text placing on canvas - /// page parameters - private void AddTextToCanvas(Rectangle imageSize, IList pageText, PdfCanvas pdfCanvas, float multiplier - , Rectangle pageMediaBox) { - if (pageText != null && pageText.Count > 0) { - Point imageCoordinates = PdfCreatorUtil.CalculateImageCoordinates(ocrPdfCreatorProperties.GetPageSize(), imageSize - ); - foreach (TextInfo item in pageText) { - String line = item.GetText(); - float bboxWidthPt = GetWidthPt(item, multiplier); - float bboxHeightPt = GetHeightPt(item, multiplier); - FontProvider fontProvider = GetOcrPdfCreatorProperties().GetFontProvider(); - String fontFamily = GetOcrPdfCreatorProperties().GetDefaultFontFamily(); - if (LineNotEmpty(line, bboxHeightPt, bboxWidthPt)) { - Document document = new Document(pdfCanvas.GetDocument()); - document.SetFontProvider(fontProvider); - // Scale the text width to fit the OCR bbox - float fontSize = PdfCreatorUtil.CalculateFontSize(document, line, fontFamily, bboxHeightPt, bboxWidthPt); - float lineWidth = PdfCreatorUtil.GetRealLineWidth(document, line, fontFamily, fontSize); - float xOffset = GetXOffsetPt(item, multiplier); - float yOffset = GetYOffsetPt(item, multiplier, imageSize); - iText.Layout.Canvas canvas = new iText.Layout.Canvas(pdfCanvas, pageMediaBox); - canvas.SetFontProvider(fontProvider); - Text text = new Text(line).SetHorizontalScaling(bboxWidthPt / lineWidth); - Paragraph paragraph = new Paragraph(text).SetMargin(0).SetMultipliedLeading(1.2f); - paragraph.SetFontFamily(fontFamily).SetFontSize(fontSize); - paragraph.SetWidth(bboxWidthPt * 1.5f); - if (ocrPdfCreatorProperties.GetTextColor() != null) { - paragraph.SetFontColor(ocrPdfCreatorProperties.GetTextColor()); - } - else { - paragraph.SetTextRenderingMode(PdfCanvasConstants.TextRenderingMode.INVISIBLE); - } - canvas.ShowTextAligned(paragraph, xOffset + (float)imageCoordinates.x, yOffset + (float)imageCoordinates.y - , TextAlignment.LEFT); - canvas.Close(); + /// current page + private void AddTextToCanvas(Rectangle imageSize, IList pageText, IDictionary flatLogicalTree, PdfCanvas pdfCanvas, float multiplier, PdfPage page) { + if (pageText == null || pageText.Count == 0) { + return; + } + Rectangle pageMediaBox = page.GetMediaBox(); + Point imageCoordinates = PdfCreatorUtil.CalculateImageCoordinates(ocrPdfCreatorProperties.GetPageSize(), imageSize + ); + foreach (TextInfo item in pageText) { + float bboxWidthPt = GetWidthPt(item, multiplier); + float bboxHeightPt = GetHeightPt(item, multiplier); + FontProvider fontProvider = GetOcrPdfCreatorProperties().GetFontProvider(); + String fontFamily = GetOcrPdfCreatorProperties().GetDefaultFontFamily(); + String line = item.GetText(); + if (!LineNotEmpty(line, bboxHeightPt, bboxWidthPt)) { + continue; + } + Document document = new Document(pdfCanvas.GetDocument()); + document.SetFontProvider(fontProvider); + // Scale the text width to fit the OCR bbox + float fontSize = PdfCreatorUtil.CalculateFontSize(document, line, fontFamily, bboxHeightPt, bboxWidthPt); + float lineWidth = PdfCreatorUtil.GetRealLineWidth(document, line, fontFamily, fontSize); + float xOffset = GetXOffsetPt(item, multiplier); + float yOffset = GetYOffsetPt(item, multiplier, imageSize); + TagTreePointer tagPointer = flatLogicalTree.Get(item); + if (tagPointer != null) { + pdfCanvas.OpenTag(tagPointer.GetTagReference()); + } + else { + if (ocrPdfCreatorProperties.IsTagged()) { + pdfCanvas.OpenTag(new CanvasArtifact()); } } + iText.Layout.Canvas canvas = new iText.Layout.Canvas(pdfCanvas, pageMediaBox); + canvas.SetFontProvider(fontProvider); + Text text = new Text(line).SetHorizontalScaling(bboxWidthPt / lineWidth); + Paragraph paragraph = new Paragraph(text).SetMargin(0); + paragraph.SetFontFamily(fontFamily).SetFontSize(fontSize); + paragraph.SetWidth(bboxWidthPt * 1.5f); + if (ocrPdfCreatorProperties.GetTextColor() != null) { + paragraph.SetFontColor(ocrPdfCreatorProperties.GetTextColor()); + } + else { + paragraph.SetTextRenderingMode(PdfCanvasConstants.TextRenderingMode.INVISIBLE); + } + canvas.ShowTextAligned(paragraph, xOffset + (float)imageCoordinates.x, yOffset + (float)imageCoordinates.y + , TextAlignment.LEFT); + if (ocrPdfCreatorProperties.IsTagged()) { + pdfCanvas.CloseTag(); + } + canvas.Close(); } } diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs index dd31095..f5dec9b 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorEventHelper.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs index 381a0c1..75482a5 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrPdfCreatorProperties.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -116,6 +116,9 @@ public class OcrPdfCreatorProperties { private IMetaInfo metaInfo; + /// Indicates whether the created pdf is tagged or not. + private bool tagged = false; + /// /// Creates a new /// @@ -469,6 +472,36 @@ public virtual iText.Pdfocr.OcrPdfCreatorProperties SetImageRotationHandler(IIma return this; } + /// Defines whether pdf document should be tagged or not. + /// + /// + /// + /// if the result pdf is expected to be tagged, + /// + /// otherwise. + /// + /// + /// this + /// + /// instance. + /// + public virtual iText.Pdfocr.OcrPdfCreatorProperties SetTagged(bool tagged) { + this.tagged = tagged; + return this; + } + + /// Retrieve information on whether pdf document should be tagged or not. + /// + /// + /// + /// if the result pdf is expected to be tagged, + /// + /// otherwise. + /// + public virtual bool IsTagged() { + return tagged; + } + /// /// Set meta info for this /// . diff --git a/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs b/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs index 336d23d..052fe89 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/OcrProcessContext.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -25,6 +25,8 @@ namespace iText.Pdfocr { public class OcrProcessContext { private AbstractPdfOcrEventHelper ocrEventHelper; + private IOcrProcessProperties ocrProcessProperties; + /// Creates an instance of ocr process context /// helper class for working with events public OcrProcessContext(AbstractPdfOcrEventHelper eventHelper) { @@ -45,5 +47,17 @@ public virtual AbstractPdfOcrEventHelper GetOcrEventHelper() { public virtual void SetOcrEventHelper(AbstractPdfOcrEventHelper eventHelper) { this.ocrEventHelper = eventHelper; } + + /// Set extra OCR process properties. + /// extra OCR process properties. + internal virtual void SetOcrProcessProperties(IOcrProcessProperties ocrProcessProperties) { + this.ocrProcessProperties = ocrProcessProperties; + } + + /// Get extra OCR process properties. + /// extra OCR process properties. + public virtual IOcrProcessProperties GetOcrProcessProperties() { + return ocrProcessProperties; + } } } diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs b/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs index a25101a..00f7448 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/PdfCreatorUtil.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -69,6 +69,7 @@ internal static float CalculateFontSize(Document document, String line, String f float maxFontSize = bbox.GetHeight(); try { Paragraph paragraph = new Paragraph(line); + paragraph.SetMargin(0); paragraph.SetWidth(bbox.GetWidth()); paragraph.SetFontFamily(fontFamily); while (Math.Abs(fontSize - maxFontSize) > 1e-1) { diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs index c9f3fa1..0073ac2 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrFontProvider.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs index 8ba277d..6bdfa92 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/PdfOcrMetaInfoContainer.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/ScaleMode.cs b/itext/itext.pdfocr.api/itext/pdfocr/ScaleMode.cs index 5e93ad7..9d5a7cf 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/ScaleMode.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/ScaleMode.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs b/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs index f74bc06..4f94fa6 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/TextInfo.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -22,6 +22,7 @@ You should have received a copy of the GNU Affero General Public License */ using System; using iText.Kernel.Geom; +using iText.Pdfocr.Structuretree; namespace iText.Pdfocr { /// @@ -38,6 +39,13 @@ public class TextInfo { /// private Rectangle bboxRect; + /// + /// If LogicalStructureTreeItem is set, then + /// + /// s are expected to be in logical order. + /// + private LogicalStructureTreeItem logicalStructureTreeItem; + /// /// Creates a new /// @@ -104,5 +112,27 @@ public virtual Rectangle GetBboxRect() { public virtual void SetBboxRect(Rectangle bbox) { this.bboxRect = new Rectangle(bbox); } + + /// Retrieves structure tree item for the text item. + /// structure tree item. + public virtual LogicalStructureTreeItem GetLogicalStructureTreeItem() { + return logicalStructureTreeItem; + } + + /// Sets logical structure tree parent item for the text info. + /// + /// Sets logical structure tree parent item for the text info. It allows to organize text chunks + /// into logical hierarchy, e.g. specify document paragraphs, tables, etc. + /// + /// If LogicalStructureTreeItem is set, then the list of + /// + /// s in + /// + /// return value is expected to be in logical order. + /// + /// structure tree item. + public virtual void SetLogicalStructureTreeItem(LogicalStructureTreeItem logicalStructureTreeItem) { + this.logicalStructureTreeItem = logicalStructureTreeItem; + } } } diff --git a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs index 0e7712f..b43b8d9 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrException.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs index 1f585f3..3ac996d 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrExceptionMessageConstant.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -35,6 +35,8 @@ public class PdfOcrExceptionMessageConstant { public const String STATISTICS_EVENT_TYPE_IS_NOT_DETECTED = "Statistics event type is not detected."; + public const String TAGGING_IS_NOT_SUPPORTED = "Tagging is not supported by the OCR engine."; + private PdfOcrExceptionMessageConstant() { } //Private constructor will prevent the instantiation of this class directly diff --git a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs index d5544b9..0ec8afe 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/exceptions/PdfOcrInputException.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs b/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs index 46a2193..ed409dc 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/logs/PdfOcrLogMessageConstant.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs index cd1f617..cd519e1 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputType.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs index 19d45bd..3b2e0dc 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs index 6c2d176..bc5f621 100644 --- a/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs +++ b/itext/itext.pdfocr.api/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ArtifactItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ArtifactItem.cs new file mode 100644 index 0000000..e0f1b7c --- /dev/null +++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ArtifactItem.cs @@ -0,0 +1,49 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +namespace iText.Pdfocr.Structuretree { + /// This class represents artifact structure tree item. + /// + /// This class represents artifact structure tree item. Attaching such item to the text info means that + /// the text will be marked as artifact. + /// + public sealed class ArtifactItem : LogicalStructureTreeItem { + private static readonly iText.Pdfocr.Structuretree.ArtifactItem ARTIFACT_INSTANCE = new iText.Pdfocr.Structuretree.ArtifactItem + (); + + private ArtifactItem() + : base() { + } + + /// + /// Retrieve an instance of + /// . + /// + /// + /// an instance of + /// . + /// + public static iText.Pdfocr.Structuretree.ArtifactItem GetInstance() { + return ARTIFACT_INSTANCE; + } + } +} diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/LogicalStructureTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/LogicalStructureTreeItem.cs new file mode 100644 index 0000000..cfd0156 --- /dev/null +++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/LogicalStructureTreeItem.cs @@ -0,0 +1,125 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using System.Collections.Generic; +using iText.Kernel.Pdf.Tagutils; + +namespace iText.Pdfocr.Structuretree { + /// This class represents structure tree item of the text item put into the pdf document. + /// + /// This class represents structure tree item of the text item put into the pdf document. + /// See + /// . + /// + public class LogicalStructureTreeItem { + private AccessibilityProperties accessibilityProperties; + + private IList children = new List(); + + private iText.Pdfocr.Structuretree.LogicalStructureTreeItem parent; + + /// + /// Instantiate a new + /// + /// instance. + /// + public LogicalStructureTreeItem() + : this(null) { + } + + /// + /// Instantiate a new + /// + /// instance. + /// + /// properties to define and describe pdf structure elements. + public LogicalStructureTreeItem(AccessibilityProperties accessibilityProperties) { + this.accessibilityProperties = accessibilityProperties; + } + + /// Retrieve structure tree element's properties. + /// structure tree element's properties. + public virtual AccessibilityProperties GetAccessibilityProperties() { + return accessibilityProperties; + } + + /// Set structure tree element's properties. + /// structure tree element's properties. + /// + /// this + /// + /// instance. + /// + public virtual iText.Pdfocr.Structuretree.LogicalStructureTreeItem SetAccessibilityProperties(AccessibilityProperties + accessibilityProperties) { + this.accessibilityProperties = accessibilityProperties; + return this; + } + + /// Retrieve parent structure tree item. + /// parent structure tree item. + public virtual iText.Pdfocr.Structuretree.LogicalStructureTreeItem GetParent() { + return parent; + } + + /// Add child structure tree item. + /// child structure tree item. + /// + /// this + /// + /// instance. + /// + public virtual iText.Pdfocr.Structuretree.LogicalStructureTreeItem AddChild(iText.Pdfocr.Structuretree.LogicalStructureTreeItem + child) { + children.Add(child); + if (child.GetParent() != null) { + child.GetParent().RemoveChild(child); + } + child.parent = this; + return this; + } + + /// Remove child structure tree item. + /// child structure tree item. + /// + /// + /// + /// if the child was removed, + /// + /// otherwise. + /// + public virtual bool RemoveChild(iText.Pdfocr.Structuretree.LogicalStructureTreeItem child) { + if (children.Remove(child)) { + child.parent = null; + return true; + } + return false; + } + + /// Retrieve all child structure tree items. + /// all child structure tree items. + public virtual IList GetChildren() { + return children; + } + } +} diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ParagraphTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ParagraphTreeItem.cs new file mode 100644 index 0000000..10e1b1d --- /dev/null +++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/ParagraphTreeItem.cs @@ -0,0 +1,38 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using iText.Kernel.Pdf.Tagging; +using iText.Kernel.Pdf.Tagutils; + +namespace iText.Pdfocr.Structuretree { + /// A convenience class to associate certain text items with the paragraph structure item. + public class ParagraphTreeItem : LogicalStructureTreeItem { + /// + /// Instantiate a new + /// + /// instance. + /// + public ParagraphTreeItem() + : base(new DefaultAccessibilityProperties(StandardRoles.P)) { + } + } +} diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/SpanTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/SpanTreeItem.cs new file mode 100644 index 0000000..6473696 --- /dev/null +++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/SpanTreeItem.cs @@ -0,0 +1,38 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using iText.Kernel.Pdf.Tagging; +using iText.Kernel.Pdf.Tagutils; + +namespace iText.Pdfocr.Structuretree { + /// A convenience class to associate certain text items with the span structure item. + public class SpanTreeItem : LogicalStructureTreeItem { + /// + /// Instantiate a new + /// + /// instance. + /// + public SpanTreeItem() + : base(new DefaultAccessibilityProperties(StandardRoles.SPAN)) { + } + } +} diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableCellTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableCellTreeItem.cs new file mode 100644 index 0000000..ef022a1 --- /dev/null +++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableCellTreeItem.cs @@ -0,0 +1,38 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using iText.Kernel.Pdf.Tagging; +using iText.Kernel.Pdf.Tagutils; + +namespace iText.Pdfocr.Structuretree { + /// A convenience class to associate certain text items with the table cell structure item. + public class TableCellTreeItem : LogicalStructureTreeItem { + /// + /// Instantiate a new + /// + /// instance. + /// + public TableCellTreeItem() + : base(new DefaultAccessibilityProperties(StandardRoles.TD)) { + } + } +} diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableRowTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableRowTreeItem.cs new file mode 100644 index 0000000..8580a83 --- /dev/null +++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableRowTreeItem.cs @@ -0,0 +1,50 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using iText.Kernel.Pdf.Tagging; +using iText.Kernel.Pdf.Tagutils; + +namespace iText.Pdfocr.Structuretree { + /// A convenience class to associate certain text items with the table row structure item. + public class TableRowTreeItem : LogicalStructureTreeItem { + /// + /// Instantiate a new + /// + /// instance. + /// + public TableRowTreeItem() + : base(new DefaultAccessibilityProperties(StandardRoles.TR)) { + } + + /// Add a new table cell structure tree item to the table row. + /// table cell structure tree item to be added. + /// + /// this + /// + /// instance. + /// + public virtual iText.Pdfocr.Structuretree.TableRowTreeItem AddCell(TableCellTreeItem cellItem) { + AddChild(cellItem); + return this; + } + } +} diff --git a/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableTreeItem.cs b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableTreeItem.cs new file mode 100644 index 0000000..1887871 --- /dev/null +++ b/itext/itext.pdfocr.api/itext/pdfocr/structuretree/TableTreeItem.cs @@ -0,0 +1,50 @@ +/* +This file is part of the iText (R) project. +Copyright (c) 1998-2024 Apryse Group NV +Authors: Apryse Software. + +This program is offered under a commercial and under the AGPL license. +For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below. + +AGPL licensing: +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +using iText.Kernel.Pdf.Tagging; +using iText.Kernel.Pdf.Tagutils; + +namespace iText.Pdfocr.Structuretree { + /// A convenience class to associate certain text items with the table structure item. + public class TableTreeItem : LogicalStructureTreeItem { + /// + /// Instantiate a new + /// + /// instance. + /// + public TableTreeItem() + : base(new DefaultAccessibilityProperties(StandardRoles.TABLE)) { + } + + /// Add a new row structure tree item to the table. + /// row structure tree item to be added. + /// + /// this + /// + /// instance. + /// + public virtual iText.Pdfocr.Structuretree.TableTreeItem AddRow(TableRowTreeItem rowItem) { + AddChild(rowItem); + return this; + } + } +} diff --git a/itext/itext.pdfocr.api/pdfocr-api.nuspec b/itext/itext.pdfocr.api/pdfocr-api.nuspec index 9052bef..0d06e07 100644 --- a/itext/itext.pdfocr.api/pdfocr-api.nuspec +++ b/itext/itext.pdfocr.api/pdfocr-api.nuspec @@ -2,7 +2,7 @@ itext.pdfocr.api - 3.0.1 + 3.0.2 iText pdfOcr Apryse Software Apryse Software @@ -12,13 +12,13 @@ true pdfOCR is an iText add-on to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving pdfOCR is an iText add-on for C# (.NET) to recognize and extract text in scanned documents and images - Copyright (c) 1998-2023 Apryse Group NV + Copyright (c) 1998-2024 Apryse Group NV en-US OCR PDF ligatures text glyphs iText Optical Character Recognition PDF/A ISO-compliant Tesseract open-source opensource English Mandarin Chinese Hindi Spanish French Arabic Bengali Russian Portuguese Indonesian scan image extractable data searchable diacritic sdk c# .net - + diff --git a/itext/itext.pdfocr.tesseract4/PdfOcrTesseract4Extensions.cs b/itext/itext.pdfocr.tesseract4/PdfOcrTesseract4Extensions.cs index 5c52b7c..a7d6f31 100644 --- a/itext/itext.pdfocr.tesseract4/PdfOcrTesseract4Extensions.cs +++ b/itext/itext.pdfocr.tesseract4/PdfOcrTesseract4Extensions.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs b/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs index 3cd1be2..497e6c9 100644 --- a/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs +++ b/itext/itext.pdfocr.tesseract4/Properties/AssemblyInfo.cs @@ -7,16 +7,16 @@ [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("Apryse Group NV")] [assembly: AssemblyProduct("iText")] -[assembly: AssemblyCopyright ("Copyright (c) 1998-2023 Apryse Group NV")] +[assembly: AssemblyCopyright("Copyright (c) 1998-2024 Apryse Group NV")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] [assembly: ComVisible(false)] [assembly: Guid("0c4ceb00-9a56-4547-a925-5974a85a6048")] -[assembly: AssemblyVersion("3.0.1.0")] -[assembly: AssemblyFileVersion("3.0.1.0")] -[assembly: AssemblyInformationalVersion("3.0.1")] +[assembly: AssemblyVersion("3.0.2.0")] +[assembly: AssemblyFileVersion("3.0.2.0")] +[assembly: AssemblyInformationalVersion("3.0.2")] [assembly: InternalsVisibleTo("itext.pdfocr.tesseract4.tests, PublicKey=" + "00240000048000009400000006020000002400005253413100040000010001008b21ed5b3fc1c1" + "1996390981fe22bbe71a39a9e11d3c2cefddd6ee92920fa871f9666ae0fa941af0280d0653df04" + @@ -24,4 +24,4 @@ "009746bbdafcb75bcdbcecb7caf1f0f4b6e7d013906ba60b66eb1c8298e4efb052caf6cece4bf1" + "816902cc")] -[assembly: Versions.Attributes.KernelVersion("8.0.2.0")] +[assembly: Versions.Attributes.KernelVersion("8.0.3.0")] diff --git a/itext/itext.pdfocr.tesseract4/Properties/KernelVersionAttribute.cs b/itext/itext.pdfocr.tesseract4/Properties/KernelVersionAttribute.cs index 58d560d..96767ba 100644 --- a/itext/itext.pdfocr.tesseract4/Properties/KernelVersionAttribute.cs +++ b/itext/itext.pdfocr.tesseract4/Properties/KernelVersionAttribute.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs index cc05a20..ad6ad60 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingOptions.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingOptions.cs index ed5f177..4d99864 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingOptions.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingOptions.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs index 8eddc44..1ba18d5 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/ImagePreprocessingUtil.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/LeptonicaImageRotationHandler.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/LeptonicaImageRotationHandler.cs index 90e8a86..c94ab5c 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/LeptonicaImageRotationHandler.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/LeptonicaImageRotationHandler.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/OutputFormat.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/OutputFormat.cs index 3cdcb90..b42e5a8 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/OutputFormat.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/OutputFormat.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs index 09b2b14..ac9a871 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4EventHelper.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs index c34b05f..59d052f 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs index 8251f32..b15802a 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4FileResultEventHelper.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs index b59df4f..0f66c02 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4LibOcrEngine.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs index b500968..2c69b19 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4MetaInfo.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs index d874608..b74acff 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/Tesseract4OcrEngineProperties.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs index 4870bfa..5002325 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractHelper.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs index 8bf5ae3..4969cdf 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TesseractOcrUtil.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV + Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TextPositioning.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TextPositioning.cs index 44f5afe..15a12a3 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TextPositioning.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/TextPositioning.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs index f6fc6a0..68c0304 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. @@ -34,11 +34,11 @@ public class PdfOcrTesseract4ProductData { private const String PDF_OCR_TESSERACT4_PUBLIC_PRODUCT_NAME = "pdfOCR-Tesseract4"; - private const String PDF_OCR_VERSION = "3.0.1"; + private const String PDF_OCR_VERSION = "3.0.2"; private const int PDF_OCR_COPYRIGHT_SINCE = 2000; - private const int PDF_OCR_COPYRIGHT_TO = 2023; + private const int PDF_OCR_COPYRIGHT_TO = 2024; private static readonly ProductData PDF_OCR_PRODUCT_DATA = new ProductData(PDF_OCR_TESSERACT4_PUBLIC_PRODUCT_NAME , PDF_OCR_TESSERACT4_PRODUCT_NAME, PDF_OCR_VERSION, PDF_OCR_COPYRIGHT_SINCE, PDF_OCR_COPYRIGHT_TO); diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs index 6e1826a..607cdfe 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/actions/events/PdfOcrTesseract4ProductEvent.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs index ff66562..2c8b0dc 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrInputTesseract4Exception.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs index ccfc0e4..8f05202 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4Exception.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs index 760a20b..2825801 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/exceptions/PdfOcrTesseract4ExceptionMessageConstant.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs index 04bc1d3..61e3699 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/logs/Tesseract4LogMessageConstant.cs @@ -1,6 +1,6 @@ /* This file is part of the iText (R) project. -Copyright (c) 1998-2023 Apryse Group NV +Copyright (c) 1998-2024 Apryse Group NV Authors: Apryse Software. This program is offered under a commercial and under the AGPL license. diff --git a/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec b/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec index c8e532e..f9e829b 100644 --- a/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec +++ b/itext/itext.pdfocr.tesseract4/pdfocr-tesseract4.nuspec @@ -2,7 +2,7 @@ itext.pdfocr.tesseract4 - 3.0.1 + 3.0.2 iText pdfOcr Apryse Software Apryse Software @@ -12,13 +12,13 @@ true pdfOCR is an iText add-on to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving pdfOCR is an iText add-on for C# (.NET) to recognize and extract text in scanned documents and images - Copyright (c) 1998-2023 Apryse Group NV + Copyright (c) 1998-2024 Apryse Group NV en-US OCR PDF ligatures text glyphs iText Optical Character Recognition PDF/A ISO-compliant Tesseract open-source opensource English Mandarin Chinese Hindi Spanish French Arabic Bengali Russian Portuguese Indonesian scan image extractable data searchable diacritic sdk c# .net - + diff --git a/port-hash b/port-hash index 020660f..9e2b362 100644 --- a/port-hash +++ b/port-hash @@ -1 +1 @@ -df7d64395362a4d8c3223f21563291963638d049 +151cd11a394f65e278ed1cafd7d8a24a461940ad