DocumentBySentenceSplitter
langchain4j
<!-- 这里使用 langchain4j-easy-rag 库 --> <dependency> <groupId>dev.langchain4j</groupId> <artifactId>langchain4j-easy-rag</artifactId> <version>1.0.0-beta1</version> </dependency>
// token 计算器 Tokenizer tokenizer = new HuggingFaceTokenizer(); // 创建分割器实例(最大段落大小100词符,无重叠) DocumentBySentenceSplitter splitter = new DocumentBySentenceSplitter(100, 0, tokenizer); // 准备文档 String text = """ 这里是英文文档 """; Document document = Document.from(text); // 执行分割 List<TextSegment> segments = splitter.split(document);