From df0471d2696fa9336ec9c3486e5794d4cdabeb05 Mon Sep 17 00:00:00 2001 From: Christian Tzolov Date: Tue, 15 Oct 2024 15:49:12 +0200 Subject: [PATCH] Add Spring AI document readers to initializr - Include Markdown, Tika, and PDF document readers - Add descriptions and reference links for each reader - Set Spring AI BOM and disable starters for new entries See gh-1619 --- start-site/src/main/resources/application.yml | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/start-site/src/main/resources/application.yml b/start-site/src/main/resources/application.yml index 8438d4499a..98952e2c72 100644 --- a/start-site/src/main/resources/application.yml +++ b/start-site/src/main/resources/application.yml @@ -1678,6 +1678,36 @@ initializr: links: - rel: reference href: https://docs.spring.io/spring-ai/reference/api/vectordbs/weaviate.html + - name: Markdown Document Reader + id: spring-ai-markdown-document-reader + group-id: org.springframework.ai + artifact-id: spring-ai-markdown-document-reader + description: Spring AI Markdown document reader. It allows to load Markdown documents, converting them into a list of Spring AI Document objects. + bom: spring-ai + starter: false + links: + - rel: reference + href: https://docs.spring.io/spring-ai/reference/api/etl-pipeline.html#_markdown + - name: Tika Document Reader + id: spring-ai-tika-document-reader + group-id: org.springframework.ai + artifact-id: spring-ai-tika-document-reader + description: Spring AI Tika document reader. It uses Apache Tika to extract text from a variety of document formats, such as PDF, DOC/DOCX, PPT/PPTX, and HTML. and converting them into a list of Spring AI Document objects. + bom: spring-ai + starter: false + links: + - rel: reference + href: https://docs.spring.io/spring-ai/reference/api/etl-pipeline.html#_tika_docx_pptx_html + - name: PDF Document Readers + id: spring-ai-pdf-document-reader + group-id: org.springframework.ai + artifact-id: spring-ai-pdf-document-reader + description: Spring AI PDF document readers. It uses Apache PdfBox to extract text from PDF documents and converting them into a list of Spring AI Document objects. + bom: spring-ai + starter: false + links: + - rel: reference + href: https://docs.spring.io/spring-ai/reference/api/etl-pipeline.html#_pdf_page - name: Timefold Solver id: timefold-solver compatibilityRange: "[3.2.0,3.4.0-M1)"