package com.hanserwei.chat.reader; import org.springframework.ai.document.Document; import org.springframework.ai.reader.tika.TikaDocumentReader; import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.stereotype.Component; import org.springframework.web.multipart.MultipartFile; import java.util.List; @Component public class MyTikaPptReader implements DocumentParser { @Override public List parse(MultipartFile file) { // 新建 TikaDocumentReader 阅读器 TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(MultipartFileResource.of(file)); // 读取并转换为 Document 文档集合 List documents = tikaDocumentReader.get(); // 文档分块 // 使用自定义设置 TokenTextSplitter splitter = new TokenTextSplitter(1000, 400, 10, 5000, true); return splitter.apply(documents); } @Override public boolean supports(String filename, String contentType) { return hasExtension(filename, "ppt", "pptx") || matchesContentType(contentType, "application/vnd.ms-powerpoint", "application/vnd.openxmlformats-officedocument.presentationml.presentation"); } }