added limit to embed documents

This commit is contained in:
DIR\maria.del.valle
2024-11-11 17:29:06 +01:00
parent 2ac8bc1094
commit 3c93b9e686

View File

@@ -15,6 +15,7 @@ import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore; import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource; import org.springframework.core.io.Resource;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@@ -38,6 +39,12 @@ public class KSIngestor {
@Autowired @Autowired
private VectorStore vectorStore; private VectorStore vectorStore;
@Value("${ksingestor.embedded.doc.batch.size:20}")
private int embDocsBatchSize;
@Value("${ksingestor.embedded.doc.retry.time:20000}")
private int embDocRetryTime;
Logger logger = LoggerFactory.getLogger(KSIngestor.class); Logger logger = LoggerFactory.getLogger(KSIngestor.class);
@@ -209,14 +216,22 @@ public class KSIngestor {
logger.info("Embedding documents"); logger.info("Embedding documents");
docs.forEach(doc -> logger.info("Document metadata: " + doc.getMetadata())); docs.forEach(doc -> logger.info("Document metadata: " + doc.getMetadata()));
int batchSize = embDocsBatchSize;
for (int i = 0; i < docs.size(); i += batchSize) {
int end = Math.min(i + batchSize, docs.size());
List<Document> currentList = docs.subList(i, end);
try { try {
vectorStore.add(docs); Thread.sleep(embDocRetryTime);
logger.info("Documents embedded"); vectorStore.add(currentList);
logger.info("Documents embedded - Progress: Batch from {} to {} completed", i, end);
} catch (Exception e) { } catch (Exception e) {
logger.error("Error embedding documents: ", e); logger.error("Error embedding documents from {} to {}: {}", i, end, e.getMessage());
} }
} }
}
public List<String> testSimilaritySearch(String query,String filterQuery) { public List<String> testSimilaritySearch(String query,String filterQuery) {
SearchRequest searchRequest = SearchRequest.defaults().withQuery(query).withTopK(5).withSimilarityThreshold(0.1); SearchRequest searchRequest = SearchRequest.defaults().withQuery(query).withTopK(5).withSimilarityThreshold(0.1);