From df0471d2696fa9336ec9c3486e5794d4cdabeb05 Mon Sep 17 00:00:00 2001
From: Christian Tzolov <ctzolov@vmware.com>
Date: Tue, 15 Oct 2024 15:49:12 +0200
Subject: [PATCH] Add Spring AI document readers to initializr

 - Include Markdown, Tika, and PDF document readers
 - Add descriptions and reference links for each reader
 - Set Spring AI BOM and disable starters for new entries

See gh-1619
---
 start-site/src/main/resources/application.yml | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/start-site/src/main/resources/application.yml b/start-site/src/main/resources/application.yml
index 8438d4499a..98952e2c72 100644
--- a/start-site/src/main/resources/application.yml
+++ b/start-site/src/main/resources/application.yml
@@ -1678,6 +1678,36 @@ initializr:
           links:
             - rel: reference
               href: https://docs.spring.io/spring-ai/reference/api/vectordbs/weaviate.html
+        - name: Markdown Document Reader
+          id: spring-ai-markdown-document-reader
+          group-id: org.springframework.ai
+          artifact-id: spring-ai-markdown-document-reader
+          description: Spring AI Markdown document reader. It allows to load Markdown documents, converting them into a list of Spring AI Document objects.          
+          bom: spring-ai
+          starter: false
+          links:
+            - rel: reference
+              href: https://docs.spring.io/spring-ai/reference/api/etl-pipeline.html#_markdown
+        - name: Tika Document Reader
+          id: spring-ai-tika-document-reader
+          group-id: org.springframework.ai
+          artifact-id: spring-ai-tika-document-reader
+          description: Spring AI Tika document reader. It uses Apache Tika to extract text from a variety of document formats, such as PDF, DOC/DOCX, PPT/PPTX, and HTML. and converting them into a list of Spring AI Document objects.          
+          bom: spring-ai
+          starter: false
+          links:
+            - rel: reference
+              href: https://docs.spring.io/spring-ai/reference/api/etl-pipeline.html#_tika_docx_pptx_html
+        - name: PDF Document Readers
+          id: spring-ai-pdf-document-reader
+          group-id: org.springframework.ai
+          artifact-id: spring-ai-pdf-document-reader
+          description: Spring AI PDF document readers. It uses Apache PdfBox to extract text from PDF documents and converting them into a list of Spring AI Document objects.          
+          bom: spring-ai
+          starter: false
+          links:
+            - rel: reference
+              href: https://docs.spring.io/spring-ai/reference/api/etl-pipeline.html#_pdf_page
         - name: Timefold Solver
           id: timefold-solver
           compatibilityRange: "[3.2.0,3.4.0-M1)"