diff --git a/pom.xml b/pom.xml index 5e060a7..a90a3e3 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ 21 - 1.0.0-M2 + 1.0.0-M6 2023.0.3 diff --git a/src/main/java/com/olympus/apollo/config/ApolloConfig.java b/src/main/java/com/olympus/apollo/config/ApolloConfig.java index 516bf18..159706f 100644 --- a/src/main/java/com/olympus/apollo/config/ApolloConfig.java +++ b/src/main/java/com/olympus/apollo/config/ApolloConfig.java @@ -3,7 +3,6 @@ package com.olympus.apollo.config; import org.springframework.ai.azure.openai.AzureOpenAiEmbeddingModel; import org.springframework.ai.embedding.EmbeddingModel; import org.springframework.ai.openai.OpenAiEmbeddingModel; -import org.springframework.ai.vectorstore.ChromaVectorStore; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.ai.vectorstore.azure.AzureVectorStore; import org.springframework.context.annotation.Bean; diff --git a/src/main/java/com/olympus/apollo/config/VectorStoreConfig.java b/src/main/java/com/olympus/apollo/config/VectorStoreConfig.java index 1a64a32..f6907fd 100644 --- a/src/main/java/com/olympus/apollo/config/VectorStoreConfig.java +++ b/src/main/java/com/olympus/apollo/config/VectorStoreConfig.java @@ -15,7 +15,7 @@ import com.azure.core.credential.AzureKeyCredential; import com.azure.search.documents.indexes.SearchIndexClient; import com.azure.search.documents.indexes.SearchIndexClientBuilder; -//@Configuration +@Configuration public class VectorStoreConfig { @@ -44,7 +44,8 @@ public class VectorStoreConfig { fields.add(AzureVectorStore.MetadataField.text("KsFileSource")); fields.add(AzureVectorStore.MetadataField.text("KsDocumentId")); - return new AzureVectorStore(searchIndexClient, embeddingModel,initSchema, fields); + //return new AzureVectorStore(searchIndexClient, embeddingModel,initSchema, fields); + return null; } } diff --git a/src/main/java/com/olympus/apollo/controllers/FeApi/VectorStoreController.java b/src/main/java/com/olympus/apollo/controllers/FeApi/VectorStoreController.java index a95ac71..bdf8b40 100644 --- a/src/main/java/com/olympus/apollo/controllers/FeApi/VectorStoreController.java +++ b/src/main/java/com/olympus/apollo/controllers/FeApi/VectorStoreController.java @@ -50,6 +50,13 @@ public class VectorStoreController { return ResponseEntity.ok("Request In Working"); } + @PostMapping("/deleteRecordsFromVectorStore") + public ResponseEntity deleteRecordsOnlyFromVectorStore(@RequestBody DeletionRequest deletionRequest){ + deletionService.deleteRecordsOnlyFromVectorStore(deletionRequest); + return ResponseEntity.ok("Request In Working"); + } + + @PostMapping("/deleteGitRecords") public ResponseEntity deleteGitRecords(@RequestBody DeleteGitRepoDetailsRequest deleteGitRepoDetailsRequest){ deletionService.deleteRecordsOfGitRepo(deleteGitRepoDetailsRequest); diff --git a/src/main/java/com/olympus/apollo/controllers/KSFileController.java b/src/main/java/com/olympus/apollo/controllers/KSFileController.java index f677088..4e8fd5f 100644 --- a/src/main/java/com/olympus/apollo/controllers/KSFileController.java +++ b/src/main/java/com/olympus/apollo/controllers/KSFileController.java @@ -42,7 +42,7 @@ public class KSFileController { @PostMapping("/upload") - public String handleFileUpload( + public ResponseEntity handleFileUpload( @RequestParam("file") MultipartFile file, @ModelAttribute FileUploadDTO fileUploadDTO ) { @@ -53,7 +53,7 @@ public class KSFileController { ksDocument.setFileName(file.getOriginalFilename()); ksDocument.setName(file.getOriginalFilename()); ksDocument.setDescription(fileUploadDTO.getDescription()); - ksDocument.setIngestionStatus("NEW"); + ksDocument.setIngestionStatus("LOADED"); ksDocument.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date())); Date now = new Date(); @@ -79,8 +79,10 @@ public class KSFileController { ksDocument.setIngestionInfo(ksIngestionInfo); ksDocumentREpository.save(ksDocument); - return "OK"; + // return "OK"; + return ResponseEntity.ok(ksDocument); } + @ExceptionHandler(StorageFileNotFoundException.class) public ResponseEntity handleStorageFileNotFound(StorageFileNotFoundException exc) { @@ -98,7 +100,7 @@ public class KSFileController { if(ksTextsInfoOpt.isEmpty()){ ksTexts.setName(externalFileIngestionDTO.getName()); ksTexts.setDescription(externalFileIngestionDTO.getDescription()); - //ksTexts.setIngestionStatus("NEW"); + //ksTexts.setIngestionStatus("LOADED"); ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date())); Date now = new Date(); diff --git a/src/main/java/com/olympus/apollo/controllers/SearchDocController.java b/src/main/java/com/olympus/apollo/controllers/SearchDocController.java index 49392bf..a724397 100644 --- a/src/main/java/com/olympus/apollo/controllers/SearchDocController.java +++ b/src/main/java/com/olympus/apollo/controllers/SearchDocController.java @@ -6,6 +6,7 @@ import java.util.List; import org.slf4j.LoggerFactory; import org.springframework.ai.document.Document; import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.ai.vectorstore.SearchRequest.Builder; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.PostMapping; @@ -30,22 +31,37 @@ public class SearchDocController { @PostMapping("/doc_search") public List vectorSearch(@RequestBody VectorSearchRequest vectorSearchRequest) { - SearchRequest request = SearchRequest.defaults() - .withQuery(vectorSearchRequest.getQuery()) - .withTopK(vectorSearchRequest.getTopK()) - .withSimilarityThreshold(vectorSearchRequest.getThreshold()); + // SearchRequest request = SearchRequest.defaults() + // .withQuery(vectorSearchRequest.getQuery()) + // .withTopK(vectorSearchRequest.getTopK()) + // .withSimilarityThreshold(vectorSearchRequest.getThreshold()); + + // List docs = this.vectorStore.similaritySearch(request); + // logger.info("Number of VDB retrieved documents: " + docs.size()); + + // List results = new ArrayList<>(); + // for(Document doc : docs){ + // results.add(doc.getContent()); + // } + + Builder request_builder = SearchRequest.builder() + .query(vectorSearchRequest.getQuery()) + .topK(vectorSearchRequest.getTopK()) + .similarityThreshold(vectorSearchRequest.getThreshold()); if(vectorSearchRequest.getFilterQuery() != null && !vectorSearchRequest.getFilterQuery().isEmpty()){ - request.withFilterExpression(vectorSearchRequest.getFilterQuery()); + request_builder.filterExpression(vectorSearchRequest.getFilterQuery()); logger.info("Using Filter expression: " + vectorSearchRequest.getFilterQuery()); - } + } + SearchRequest request = request_builder.build(); List docs = this.vectorStore.similaritySearch(request); + logger.info("Number of VDB retrieved documents: " + docs.size()); - List results = new ArrayList<>(); - for(Document doc : docs){ - results.add(doc.getContent()); + List results = new ArrayList(); + for (Document doc : docs) { + results.add(doc.getText()); } return results; diff --git a/src/main/java/com/olympus/apollo/controllers/TestController.java b/src/main/java/com/olympus/apollo/controllers/TestController.java index de83cc9..2ca3835 100644 --- a/src/main/java/com/olympus/apollo/controllers/TestController.java +++ b/src/main/java/com/olympus/apollo/controllers/TestController.java @@ -19,6 +19,7 @@ import org.springframework.web.bind.annotation.*; import com.olympus.dto.IngestionOutput; import com.olympus.apollo.services.GitRepositoryIngestor; import com.olympus.apollo.services.KSIngestor; +import org.springframework.ai.document.Document; @RestController @@ -55,7 +56,7 @@ public class TestController { } @GetMapping("test/query_vector") - public List testSimilaritySearch(@RequestParam String query, @RequestParam String filterQuery) { + public List testSimilaritySearch(@RequestParam String query, @RequestParam String filterQuery) { return ksIngestor.testSimilaritySearch(query, filterQuery); } diff --git a/src/main/java/com/olympus/apollo/services/DeletionService.java b/src/main/java/com/olympus/apollo/services/DeletionService.java index a3522c4..1229437 100644 --- a/src/main/java/com/olympus/apollo/services/DeletionService.java +++ b/src/main/java/com/olympus/apollo/services/DeletionService.java @@ -7,6 +7,7 @@ import com.olympus.apollo.repository.*; import com.olympus.model.apollo.KSGitInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Date; import com.olympus.dto.DeletionRequest; import org.springframework.ai.document.Document; @@ -16,6 +17,8 @@ import org.springframework.messaging.simp.SimpMessagingTemplate; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; import org.springframework.ai.vectorstore.VectorStore; +import com.olympus.model.apollo.KSDocument; + import java.util.ArrayList; import java.util.List; @@ -57,25 +60,68 @@ public class DeletionService { //TODO: COMPLETE REFACTOR REQUIRED TO DELETE RECORD FROM AZURE SEARCH // NOT WORKING AT THE MOMENT - boolean KSDocumentExists = deletionRequest.getKsDocumentId() != null && - !deletionRequest.getKsDocumentId().isEmpty() && - ksDocumentRepository.existsById(deletionRequest.getKsDocumentId()); - if(KSDocumentExists){ - SearchRequest searchRequest = SearchRequest.defaults() - .withQuery("a").withTopK(1000) - .withSimilarityThreshold(0.0) - .withFilterExpression("KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'"); + // boolean KSDocumentExists = deletionRequest.getKsDocumentId() != null && + // !deletionRequest.getKsDocumentId().isEmpty() && + // ksDocumentRepository.existsById(deletionRequest.getKsDocumentId()); + // if(KSDocumentExists){ + // SearchRequest searchRequest = SearchRequest.defaults() + // .withQuery("a").withTopK(1000) + // .withSimilarityThreshold(0.0) + // .withFilterExpression("KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'"); - List docs = vectorStore.similaritySearch(searchRequest); - List ids = docs.stream().map(Document::getId).toList(); - vectorStore.delete(ids); + // List docs = vectorStore.similaritySearch(searchRequest); + // List ids = docs.stream().map(Document::getId).toList(); + // vectorStore.delete(ids); + String rag_filter = "KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'"; + logger.info("Starting deletion"); + vectorStore.delete(rag_filter); - ksDocumentRepository.deleteById(deletionRequest.getKsDocumentId()); - logger.info("KSDocument with id {} deleted successfully.", deletionRequest.getKsDocumentId()); - }else{ - logger.warn("KSDocument with id {} does not exist.", deletionRequest.getKsDocumentId()); - } + ksDocumentRepository.deleteById(deletionRequest.getKsDocumentId()); + logger.info("KSDocument with id {} deleted successfully.", deletionRequest.getKsDocumentId()); + // }else{ + // logger.warn("KSDocument with id {} does not exist.", deletionRequest.getKsDocumentId()); + // } + } catch (Exception e) { + logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName()); + throw new RuntimeException("An error occurred while deleting records", e); + } + } + + @Async("asyncTaskExecutor") + public void deleteRecordsOnlyFromVectorStore(DeletionRequest deletionRequest) { + try { + + //TODO: COMPLETE REFACTOR REQUIRED TO DELETE RECORD FROM AZURE SEARCH + // NOT WORKING AT THE MOMENT + // boolean KSDocumentExists = deletionRequest.getKsDocumentId() != null && + // !deletionRequest.getKsDocumentId().isEmpty() && + // ksDocumentRepository.existsById(deletionRequest.getKsDocumentId()); + // if(KSDocumentExists){ + // SearchRequest searchRequest = SearchRequest.defaults() + // .withQuery("a").withTopK(1000) + // .withSimilarityThreshold(0.0) + // .withFilterExpression("KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'"); + + + // List docs = vectorStore.similaritySearch(searchRequest); + // List ids = docs.stream().map(Document::getId).toList(); + // vectorStore.delete(ids); + String rag_filter = "KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'"; + logger.info("Starting deletion"); + vectorStore.delete(rag_filter); + + //elimino dal vectorStore ma mantengo il record + KSDocument ksDocument = ksDocumentRepository.findById(deletionRequest.getKsDocumentId()).get(); + ksDocument.setIngestionStatus("LOADED"); + Date now = new Date(); + ksDocument.setIngestionDate(now); + + ksDocumentRepository.save(ksDocument); + logger.info("KSDocument with id {} deleted from VectorStore successfully.", deletionRequest.getKsDocumentId()); + // }else{ + // logger.warn("KSDocument with id {} does not exist.", deletionRequest.getKsDocumentId()); + // } } catch (Exception e) { logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName()); throw new RuntimeException("An error occurred while deleting records", e); diff --git a/src/main/java/com/olympus/apollo/services/KSIngestor.java b/src/main/java/com/olympus/apollo/services/KSIngestor.java index fa82193..20b866f 100644 --- a/src/main/java/com/olympus/apollo/services/KSIngestor.java +++ b/src/main/java/com/olympus/apollo/services/KSIngestor.java @@ -10,6 +10,7 @@ import com.olympus.apollo.repository.KSTextsRepository; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.ai.document.Document; +import org.springframework.ai.vectorstore.SearchRequest.Builder; import org.springframework.ai.reader.tika.TikaDocumentReader; import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.ai.vectorstore.SearchRequest; @@ -49,9 +50,12 @@ public class KSIngestor { Logger logger = LoggerFactory.getLogger(KSIngestor.class); public void deleteAll(String document_file_name) { - List docToDelete = vectorStore.similaritySearch(SearchRequest.defaults().withQuery("*") - .withSimilarityThreshold(0.0) - .withFilterExpression("'source'=='3-automated-test-framework---atf.md'")); + Builder request_builder = SearchRequest.builder() + .query("*") + .similarityThreshold(0.0) + .filterExpression("'source'=='" + document_file_name + "'"); + SearchRequest request = request_builder.build(); + List docToDelete = vectorStore.similaritySearch(request); logger.info("Number of documents to delete: " + docToDelete.size()); } @@ -60,7 +64,7 @@ public class KSIngestor { IngestionOutput ingestionLoopOutput = new IngestionOutput(); try { - ksDocumentRepository.findAllByIngestionStatus("NEW").forEach(ksDocument -> { + ksDocumentRepository.findAllByIngestionStatus("LOADED").forEach(ksDocument -> { ingestDocument(ksDocument); ingestionLoopOutput.getIngestedDocumentId().add(ksDocument.getId()); }); @@ -77,7 +81,7 @@ public class KSIngestor { Optional optionalDocument = ksDocumentRepository.findById(id); if (optionalDocument.isPresent()) { KSDocument ksDocument = optionalDocument.get(); - if ("NEW".equals(ksDocument.getIngestionStatus())) { + if ("LOADED".equals(ksDocument.getIngestionStatus())) { return ingestDocument(ksDocument); } else { ingestionOutput.setMessage("OOPS: Document is already Injected"); @@ -232,22 +236,22 @@ public class KSIngestor { } - public List testSimilaritySearch(String query,String filterQuery) { - - SearchRequest searchRequest = SearchRequest.defaults().withQuery(query).withTopK(5).withSimilarityThreshold(0.1); + public List testSimilaritySearch(String query,String filterQuery) { + Builder request_builder = SearchRequest.builder() + .query(query) + .topK(5) + .similarityThreshold(0.1); - if(filterQuery!=null && !filterQuery.isEmpty()){ - searchRequest.withFilterExpression(filterQuery); - } + if(filterQuery != null && !filterQuery.isEmpty()){ + request_builder.filterExpression(filterQuery); + logger.info("Using Filter expression: " + filterQuery); + } - List docs = vectorStore.similaritySearch(searchRequest); + SearchRequest request = request_builder.build(); + List docs = vectorStore.similaritySearch(request); + + logger.info("Number of VDB retrieved documents: " + docs.size()); - List result = new ArrayList(); - for (Document doc : docs) { - result.add(doc.getContent()); - } - return result; + return docs; } - - -} \ No newline at end of file +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 2cdf55f..32b80c4 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -29,7 +29,7 @@ spring: client: host: "http://108.142.74.161" port: "8000" - key-token: "nVYLh3eq92aJP4x08dNdWngilPG2ooj9" + key-token: "tKAJfN1Yv5lP7pKorJHGfHMQhNEcM9uu" initialize-schema: "true" collection-name: "olympus" data: