added limit to embed documents

This commit is contained in:
DIR\maria.del.valle
2024-11-11 17:29:06 +01:00
parent 2ac8bc1094
commit 3c93b9e686

View File

@@ -15,6 +15,7 @@ import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.stereotype.Service;
@@ -38,6 +39,12 @@ public class KSIngestor {
@Autowired
private VectorStore vectorStore;
@Value("${ksingestor.embedded.doc.batch.size:20}")
private int embDocsBatchSize;
@Value("${ksingestor.embedded.doc.retry.time:20000}")
private int embDocRetryTime;
Logger logger = LoggerFactory.getLogger(KSIngestor.class);
@@ -209,12 +216,20 @@ public class KSIngestor {
logger.info("Embedding documents");
docs.forEach(doc -> logger.info("Document metadata: " + doc.getMetadata()));
try {
vectorStore.add(docs);
logger.info("Documents embedded");
} catch (Exception e) {
logger.error("Error embedding documents: ", e);
int batchSize = embDocsBatchSize;
for (int i = 0; i < docs.size(); i += batchSize) {
int end = Math.min(i + batchSize, docs.size());
List<Document> currentList = docs.subList(i, end);
try {
Thread.sleep(embDocRetryTime);
vectorStore.add(currentList);
logger.info("Documents embedded - Progress: Batch from {} to {} completed", i, end);
} catch (Exception e) {
logger.error("Error embedding documents from {} to {}: {}", i, end, e.getMessage());
}
}
}
public List<String> testSimilaritySearch(String query,String filterQuery) {