Refactor deletion logic in DeletionService to improve batch processing and error handling for vector store deletions

This commit is contained in:
2025-07-07 12:16:16 +02:00
parent 1868367eaf
commit c4084ad011

View File

@@ -7,6 +7,8 @@ import java.util.concurrent.CompletableFuture;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.messaging.simp.SimpMessagingTemplate;
@@ -17,10 +19,8 @@ import com.olympus.apollo.exception.vectorStoreMetaDetailsEmptyException;
import com.olympus.apollo.repository.KSDocumentRepository;
import com.olympus.apollo.repository.KSGitInfoRepository;
import com.olympus.apollo.repository.KSGitIngestionInfoRepository;
import com.olympus.apollo.repository.KSIngestionInfoRepository;
import com.olympus.apollo.repository.KSTextsRepository;
import com.olympus.apollo.repository.KSVideoRepository;
import com.olympus.apollo.repository.VectorStoreRepository;
import com.olympus.dto.DeleteGitRepoDetailsRequest;
import com.olympus.dto.DeletionRequest;
import com.olympus.dto.ResultDTO;
@@ -42,18 +42,12 @@ public class DeletionService {
@Autowired
private KSTextsRepository ksTextsRepository;
@Autowired
private KSIngestionInfoRepository ksIngestionInfoRepository;
@Autowired
private KSGitIngestionInfoRepository ksGitIngestionInfoRepository;
@Autowired
private KSGitInfoRepository ksGitInfoRepository;
@Autowired
private VectorStoreRepository vectorStoreRepository;
@Autowired
private SimpMessagingTemplate simpMessagingTemplate;
@@ -79,26 +73,45 @@ public class DeletionService {
@Async("asyncTaskExecutor")
public void deleteRecordsOnlyFromVectorStore(DeletionRequest deletionRequest) {
try {
KSDocument ksDocument = ksDocumentRepository.findById(deletionRequest.getKsDocumentId()).get();
KSDocument ksDocument = ksDocumentRepository.findById(deletionRequest.getKsDocumentId()).orElseThrow();
ksDocument.setIngestionStatus("DELETING");
ksDocumentRepository.save(ksDocument);
String rag_filter = "KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'";
logger.info("Starting deletion");
vectorStore.delete(rag_filter);
//elimino dal vectorStore ma mantengo il record
String rag_filter = "KsDocumentId=='" + deletionRequest.getKsDocumentId() + "'";
SearchRequest searchRequest = SearchRequest.builder()
.query(" ")
.filterExpression(rag_filter)
.topK(Integer.MAX_VALUE)
.build();
List<String> idsToDelete = vectorStore.similaritySearch(searchRequest)
.stream()
.map(Document::getId)
.toList();
logger.info("Found {} documents to delete for KsDocumentId: {}", idsToDelete.size(), deletionRequest.getKsDocumentId());
//Batch per eliminare i file con più richieste
final int DELETE_BATCH_SIZE = 500;
for (int i = 0; i < idsToDelete.size(); i += DELETE_BATCH_SIZE) {
int end = Math.min(i + DELETE_BATCH_SIZE, idsToDelete.size());
List<String> batch = idsToDelete.subList(i, end);
logger.info("Deleting batch from {} to {}", i, end);
vectorStore.delete(batch);
}
ksDocument.setIngestionStatus("LOADED");
Date now = new Date();
ksDocument.setIngestionDate(now);
ksDocument.setIngestionDate(new Date());
ksDocumentRepository.save(ksDocument);
logger.info("KSDocument with id {} deleted from VectorStore successfully.", deletionRequest.getKsDocumentId());
} catch (Exception e) {
logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName());
logger.error("An error occurred while deleting records: ", e);
throw new RuntimeException("An error occurred while deleting records", e);
}
}