diff --git a/src/main/java/com/olympus/apollo/services/DeletionService.java b/src/main/java/com/olympus/apollo/services/DeletionService.java index 070fad4..77fcbad 100644 --- a/src/main/java/com/olympus/apollo/services/DeletionService.java +++ b/src/main/java/com/olympus/apollo/services/DeletionService.java @@ -7,6 +7,8 @@ import java.util.concurrent.CompletableFuture; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.ai.document.Document; +import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.messaging.simp.SimpMessagingTemplate; @@ -17,10 +19,8 @@ import com.olympus.apollo.exception.vectorStoreMetaDetailsEmptyException; import com.olympus.apollo.repository.KSDocumentRepository; import com.olympus.apollo.repository.KSGitInfoRepository; import com.olympus.apollo.repository.KSGitIngestionInfoRepository; -import com.olympus.apollo.repository.KSIngestionInfoRepository; import com.olympus.apollo.repository.KSTextsRepository; import com.olympus.apollo.repository.KSVideoRepository; -import com.olympus.apollo.repository.VectorStoreRepository; import com.olympus.dto.DeleteGitRepoDetailsRequest; import com.olympus.dto.DeletionRequest; import com.olympus.dto.ResultDTO; @@ -42,18 +42,12 @@ public class DeletionService { @Autowired private KSTextsRepository ksTextsRepository; - @Autowired - private KSIngestionInfoRepository ksIngestionInfoRepository; - @Autowired private KSGitIngestionInfoRepository ksGitIngestionInfoRepository; @Autowired private KSGitInfoRepository ksGitInfoRepository; - @Autowired - private VectorStoreRepository vectorStoreRepository; - @Autowired private SimpMessagingTemplate simpMessagingTemplate; @@ -79,26 +73,45 @@ public class DeletionService { @Async("asyncTaskExecutor") public void deleteRecordsOnlyFromVectorStore(DeletionRequest deletionRequest) { try { - - KSDocument ksDocument = ksDocumentRepository.findById(deletionRequest.getKsDocumentId()).get(); + KSDocument ksDocument = ksDocumentRepository.findById(deletionRequest.getKsDocumentId()).orElseThrow(); ksDocument.setIngestionStatus("DELETING"); ksDocumentRepository.save(ksDocument); - String rag_filter = "KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'"; - logger.info("Starting deletion"); - vectorStore.delete(rag_filter); - //elimino dal vectorStore ma mantengo il record + String rag_filter = "KsDocumentId=='" + deletionRequest.getKsDocumentId() + "'"; + + + SearchRequest searchRequest = SearchRequest.builder() + .query(" ") + .filterExpression(rag_filter) + .topK(Integer.MAX_VALUE) + .build(); + + List idsToDelete = vectorStore.similaritySearch(searchRequest) + .stream() + .map(Document::getId) + .toList(); + + logger.info("Found {} documents to delete for KsDocumentId: {}", idsToDelete.size(), deletionRequest.getKsDocumentId()); + + //Batch per eliminare i file con più richieste + final int DELETE_BATCH_SIZE = 500; + for (int i = 0; i < idsToDelete.size(); i += DELETE_BATCH_SIZE) { + int end = Math.min(i + DELETE_BATCH_SIZE, idsToDelete.size()); + List batch = idsToDelete.subList(i, end); + logger.info("Deleting batch from {} to {}", i, end); + vectorStore.delete(batch); + } + ksDocument.setIngestionStatus("LOADED"); - Date now = new Date(); - ksDocument.setIngestionDate(now); - + ksDocument.setIngestionDate(new Date()); ksDocumentRepository.save(ksDocument); + logger.info("KSDocument with id {} deleted from VectorStore successfully.", deletionRequest.getKsDocumentId()); - + } catch (Exception e) { - logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName()); + logger.error("An error occurred while deleting records: ", e); throw new RuntimeException("An error occurred while deleting records", e); } }