diff --git a/src/main/java/com/olympus/apollo/services/KSIngestor.java b/src/main/java/com/olympus/apollo/services/KSIngestor.java index 28e92ce..5463a7b 100644 --- a/src/main/java/com/olympus/apollo/services/KSIngestor.java +++ b/src/main/java/com/olympus/apollo/services/KSIngestor.java @@ -15,6 +15,7 @@ import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.core.io.Resource; import org.springframework.stereotype.Service; @@ -37,7 +38,12 @@ public class KSIngestor { @Autowired private VectorStore vectorStore; + + @Value("${ksingestor.embedded.doc.batch.size:20}") + private int embDocsBatchSize; + @Value("${ksingestor.embedded.doc.retry.time:20000}") + private int embDocRetryTime; Logger logger = LoggerFactory.getLogger(KSIngestor.class); @@ -209,12 +215,20 @@ public class KSIngestor { logger.info("Embedding documents"); docs.forEach(doc -> logger.info("Document metadata: " + doc.getMetadata())); - try { - vectorStore.add(docs); - logger.info("Documents embedded"); - } catch (Exception e) { - logger.error("Error embedding documents: ", e); + + int batchSize = embDocsBatchSize; + for (int i = 0; i < docs.size(); i += batchSize) { + int end = Math.min(i + batchSize, docs.size()); + List currentList = docs.subList(i, end); + try { + Thread.sleep(embDocRetryTime); + vectorStore.add(currentList); + logger.info("Documents embedded - Progress: Batch from {} to {} completed", i, end); + } catch (Exception e) { + logger.error("Error embedding documents from {} to {}: {}", i, end, e.getMessage()); + } } + } public List testSimilaritySearch(String query,String filterQuery) {