added limit embed documents
This commit is contained in:
@@ -15,6 +15,7 @@ import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
|||||||
import org.springframework.ai.vectorstore.SearchRequest;
|
import org.springframework.ai.vectorstore.SearchRequest;
|
||||||
import org.springframework.ai.vectorstore.VectorStore;
|
import org.springframework.ai.vectorstore.VectorStore;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.core.io.Resource;
|
import org.springframework.core.io.Resource;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
@@ -38,6 +39,11 @@ public class KSIngestor {
|
|||||||
@Autowired
|
@Autowired
|
||||||
private VectorStore vectorStore;
|
private VectorStore vectorStore;
|
||||||
|
|
||||||
|
@Value("${ksingestor.embedded.doc.batch.size:20}")
|
||||||
|
private int embDocsBatchSize;
|
||||||
|
|
||||||
|
@Value("${ksingestor.embedded.doc.retry.time:20000}")
|
||||||
|
private int embDocRetryTime;
|
||||||
|
|
||||||
Logger logger = LoggerFactory.getLogger(KSIngestor.class);
|
Logger logger = LoggerFactory.getLogger(KSIngestor.class);
|
||||||
|
|
||||||
@@ -209,14 +215,22 @@ public class KSIngestor {
|
|||||||
logger.info("Embedding documents");
|
logger.info("Embedding documents");
|
||||||
|
|
||||||
docs.forEach(doc -> logger.info("Document metadata: " + doc.getMetadata()));
|
docs.forEach(doc -> logger.info("Document metadata: " + doc.getMetadata()));
|
||||||
|
|
||||||
|
int batchSize = embDocsBatchSize;
|
||||||
|
for (int i = 0; i < docs.size(); i += batchSize) {
|
||||||
|
int end = Math.min(i + batchSize, docs.size());
|
||||||
|
List<Document> currentList = docs.subList(i, end);
|
||||||
try {
|
try {
|
||||||
vectorStore.add(docs);
|
Thread.sleep(embDocRetryTime);
|
||||||
logger.info("Documents embedded");
|
vectorStore.add(currentList);
|
||||||
|
logger.info("Documents embedded - Progress: Batch from {} to {} completed", i, end);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Error embedding documents: ", e);
|
logger.error("Error embedding documents from {} to {}: {}", i, end, e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public List<String> testSimilaritySearch(String query,String filterQuery) {
|
public List<String> testSimilaritySearch(String query,String filterQuery) {
|
||||||
SearchRequest searchRequest = SearchRequest.defaults().withQuery(query).withTopK(5).withSimilarityThreshold(0.1);
|
SearchRequest searchRequest = SearchRequest.defaults().withQuery(query).withTopK(5).withSimilarityThreshold(0.1);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user