From 1c316767f4c7e82fab4057309dd1223be5b2ef35 Mon Sep 17 00:00:00 2001 From: sumedh Date: Fri, 4 Oct 2024 20:31:37 +0530 Subject: [PATCH] ingestion in one go --- .../apollo/controllers/KSFileController.java | 73 ++++++++++++------- .../apollo/dto/ExternalFileIngestionDTO.java | 1 + .../apollo/repository/KSTextsRepository.java | 8 +- .../repository/VectorStoreRepository.java | 3 + .../apollo/services/DeletionService.java | 38 ++++++++++ .../olympus/apollo/services/KSIngestor.java | 22 ++---- 6 files changed, 103 insertions(+), 42 deletions(-) diff --git a/src/main/java/com/olympus/apollo/controllers/KSFileController.java b/src/main/java/com/olympus/apollo/controllers/KSFileController.java index 8e0704d..c6f0475 100644 --- a/src/main/java/com/olympus/apollo/controllers/KSFileController.java +++ b/src/main/java/com/olympus/apollo/controllers/KSFileController.java @@ -4,12 +4,15 @@ import java.util.HashMap; import java.util.Date; import java.text.SimpleDateFormat; import java.util.List; +import java.util.Optional; +import com.olympus.apollo.dto.DeletionRequest; import com.olympus.apollo.dto.ExternalFileIngestionDTO; import com.olympus.apollo.dto.IngestionOutput; import com.olympus.apollo.models.KSGitInfo; import com.olympus.apollo.models.KSTexts; import com.olympus.apollo.repository.KSTextsRepository; +import com.olympus.apollo.services.DeletionService; import com.olympus.apollo.services.KSIngestor; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.ResponseEntity; @@ -24,6 +27,8 @@ import com.olympus.apollo.exception.StorageFileNotFoundException; import com.olympus.apollo.services.StorageService; import com.olympus.apollo.dto.FileUploadDTO; +import javax.swing.text.html.Option; + @RestController public class KSFileController { @Autowired @@ -37,6 +42,8 @@ public class KSFileController { private KSTextsRepository ksTextsRepository; @Autowired private KSIngestor ksIngestor; + @Autowired + private DeletionService deletionService; @PostMapping("/upload") @@ -89,36 +96,44 @@ public class KSFileController { public String handleExternalIngestion( @RequestBody ExternalFileIngestionDTO externalFileIngestionDTO ) { - KSTexts ksTexts = new KSTexts(); - ksTexts.setTextToEmbed(externalFileIngestionDTO.getTextToEmbed()); + //ksTexts.setTextToEmbed(externalFileIngestionDTO.getTextToEmbed()); ksTexts.setName(externalFileIngestionDTO.getName()); - ksTexts.setDescription(externalFileIngestionDTO.getDescription()); - ksTexts.setIngestionStatus("NEW"); - ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date())); + Optional ksTextsInfoOpt= ksTextsRepository.findByName(externalFileIngestionDTO.getName()); + if(ksTextsInfoOpt.isEmpty()){ + ksTexts.setName(externalFileIngestionDTO.getName()); + ksTexts.setDescription(externalFileIngestionDTO.getDescription()); + //ksTexts.setIngestionStatus("NEW"); + ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date())); - Date now = new Date(); - ksTexts.setIngestionDate(now); + Date now = new Date(); + ksTexts.setIngestionDate(now); + HashMap IngestionInfo = new HashMap<>(); + ksTexts.setType(externalFileIngestionDTO.getType()); + HashMap metadata = new HashMap<>(); + metadata.put("KsApplicationName", externalFileIngestionDTO.getKsApplicationName()); + metadata.put("KsDoctype", externalFileIngestionDTO.getKsDocType()); + metadata.put("KsDocSource", externalFileIngestionDTO.getKsDocSource()); + metadata.put("KsFileSource", externalFileIngestionDTO.getName()); + IngestionInfo.put("type",externalFileIngestionDTO.getType()); + IngestionInfo.put("metadata",metadata); + //ksTexts.setMetadata(metadata); + ksTexts.setIngestionInfo(IngestionInfo); + ksTexts.setDefaultChunkSize(externalFileIngestionDTO.getDefaultChunkSize()); + ksTexts.setMinChunkSize(externalFileIngestionDTO.getMinChunkSize()); + ksTexts.setMaxNumberOfChunks(externalFileIngestionDTO.getMaxNumberOfChunks()); + ksTexts.setMinChunkSizeToEmbed(externalFileIngestionDTO.getMinChunkSizeToEmbed()); + ksTexts.setAdditionalMetadata(externalFileIngestionDTO.getAdditionalMetaData()); - HashMap IngestionInfo = new HashMap<>(); - ksTexts.setType(externalFileIngestionDTO.getType()); - HashMap metadata = new HashMap<>(); - metadata.put("KsApplicationName", externalFileIngestionDTO.getKsApplicationName()); - metadata.put("KsDoctype", externalFileIngestionDTO.getKsDocType()); - metadata.put("KsDocSource", externalFileIngestionDTO.getKsDocSource()); - metadata.put("KsFileSource", externalFileIngestionDTO.getName()); - IngestionInfo.put("type",externalFileIngestionDTO.getType()); - IngestionInfo.put("metadata",metadata); - //ksTexts.setMetadata(metadata); - ksTexts.setIngestionInfo(IngestionInfo); - ksTexts.setDefaultChunkSize(externalFileIngestionDTO.getDefaultChunkSize()); - ksTexts.setMinChunkSize(externalFileIngestionDTO.getMinChunkSize()); - ksTexts.setMaxNumberOfChunks(externalFileIngestionDTO.getMaxNumberOfChunks()); - ksTexts.setMinChunkSizeToEmbed(externalFileIngestionDTO.getMinChunkSizeToEmbed()); - ksTexts.setAdditionalMetadata(externalFileIngestionDTO.getAdditionalMetaData()); + ksTextsRepository.save(ksTexts); + System.out.println(ksTexts.getId()); - ksTextsRepository.save(ksTexts); + ksIngestor.ingestTextById(ksTexts.getId(),externalFileIngestionDTO.getTextToEmbed(),externalFileIngestionDTO.getKsExternalDocUniqueId()); + }else { + KSTexts ksTexts1 = ksTextsInfoOpt.get(); + ksIngestor.ingestTextById(ksTexts1.getId(),externalFileIngestionDTO.getTextToEmbed(),externalFileIngestionDTO.getKsExternalDocUniqueId()); + } return "OK"; } @@ -129,8 +144,14 @@ public class KSFileController { return result; } - @GetMapping("/ingest_texts/{id}") + @DeleteMapping("/deleteTextRecords/{id}") + public ResponseEntity deleteTextRecords(@PathVariable String id){ + deletionService.deleteTextRecords(id); + return ResponseEntity.ok("Request In Working"); + } + + /*@GetMapping("/ingest_texts/{id}") public IngestionOutput ingestDocumentById(@PathVariable String id) { return ksIngestor.ingestTextById(id); - } + }*/ } \ No newline at end of file diff --git a/src/main/java/com/olympus/apollo/dto/ExternalFileIngestionDTO.java b/src/main/java/com/olympus/apollo/dto/ExternalFileIngestionDTO.java index d5e85dd..719ece5 100644 --- a/src/main/java/com/olympus/apollo/dto/ExternalFileIngestionDTO.java +++ b/src/main/java/com/olympus/apollo/dto/ExternalFileIngestionDTO.java @@ -20,4 +20,5 @@ public class ExternalFileIngestionDTO { private int maxNumberOfChunks; private int minChunkSizeToEmbed; private HashMap additionalMetaData; + private String ksExternalDocUniqueId; } diff --git a/src/main/java/com/olympus/apollo/repository/KSTextsRepository.java b/src/main/java/com/olympus/apollo/repository/KSTextsRepository.java index d36e52b..907675d 100644 --- a/src/main/java/com/olympus/apollo/repository/KSTextsRepository.java +++ b/src/main/java/com/olympus/apollo/repository/KSTextsRepository.java @@ -2,14 +2,20 @@ package com.olympus.apollo.repository; import com.olympus.apollo.models.KSDocument; +import com.olympus.apollo.models.KSGitInfo; import com.olympus.apollo.models.KSTexts; import org.springframework.data.mongodb.repository.MongoRepository; +import org.springframework.data.mongodb.repository.Query; import org.springframework.data.rest.core.annotation.RepositoryRestResource; import org.springframework.web.bind.annotation.CrossOrigin; +import java.util.Optional; + @RepositoryRestResource(collectionResourceRel = "ksinternal", path = "ksinternal") @CrossOrigin public interface KSTextsRepository extends MongoRepository { - public Iterable findAllByIngestionStatus(String status); + @Query("{'name': ?0}") + Optional findByName(String name); + } diff --git a/src/main/java/com/olympus/apollo/repository/VectorStoreRepository.java b/src/main/java/com/olympus/apollo/repository/VectorStoreRepository.java index fd3f009..47c34ee 100644 --- a/src/main/java/com/olympus/apollo/repository/VectorStoreRepository.java +++ b/src/main/java/com/olympus/apollo/repository/VectorStoreRepository.java @@ -32,6 +32,9 @@ public interface VectorStoreRepository extends MongoRepository findByFilePath(String filePath); + @Query("{'metadata.KsInternalMainEntityId': ?0}") + List findByKsInternalMainEntityId(String filePath); + @Query("{'metadata.KsApplicationName': ?0, 'metadata.KsBranch': ?1, 'metadata.filePath': ?2}") Optional findByKsapplicationNameKsBranchFilePath(String ksApplicationName,String ksBranch,String filePath); } diff --git a/src/main/java/com/olympus/apollo/services/DeletionService.java b/src/main/java/com/olympus/apollo/services/DeletionService.java index beaf3c4..a47e904 100644 --- a/src/main/java/com/olympus/apollo/services/DeletionService.java +++ b/src/main/java/com/olympus/apollo/services/DeletionService.java @@ -18,6 +18,8 @@ import org.springframework.messaging.simp.SimpMessagingTemplate; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; +import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; @@ -30,6 +32,9 @@ public class DeletionService { @Autowired private KSDocumentRepository ksDocumentRepository; + @Autowired + private KSTextsRepository ksTextsRepository; + @Autowired private KSIngestionInfoRepository ksIngestionInfoRepository; @@ -225,4 +230,37 @@ public class DeletionService { } } } + + + + @Async("asyncTaskExecutor") + public void deleteTextRecords(String id) { + try { + boolean KSTextExists = ksTextsRepository.existsById(id); + + List vectorStoreMetadataDetails = vectorStoreRepository.findByKsInternalMainEntityId(id); + + if (KSTextExists && !vectorStoreMetadataDetails.isEmpty()) { + for (VectorStore store : vectorStoreMetadataDetails) { + vectorStoreRepository.deleteById(store.getId()); + logger.info("VectorStore with id {} deleted successfully.", store.getId()+" "); + } + + ksTextsRepository.deleteById(id); + logger.info("KSDocument with id {} deleted successfully.", id+" "); + + + logger.info("All records deleted successfully."); + } else { + if (!KSTextExists) { + logger.warn("KSDocument with id {} does not exist.", id+" "); + } else if (vectorStoreMetadataDetails.isEmpty()) { + logger.warn("No VectorStore Data available",Thread.currentThread().getName()); + } + } + } catch (Exception e) { + logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName()); + throw new RuntimeException("An error occurred while deleting records", e); + } + } } \ No newline at end of file diff --git a/src/main/java/com/olympus/apollo/services/KSIngestor.java b/src/main/java/com/olympus/apollo/services/KSIngestor.java index 993889a..6cb8c5f 100644 --- a/src/main/java/com/olympus/apollo/services/KSIngestor.java +++ b/src/main/java/com/olympus/apollo/services/KSIngestor.java @@ -164,33 +164,23 @@ public class KSIngestor { return ingestionLoopOutput; } - public IngestionOutput ingestTextById(String id) { + public IngestionOutput ingestTextById(String id,String textToBeEmbed,String KsExternalDocUniqueID) { IngestionOutput ingestionOutput= new IngestionOutput(); Optional optionalDocument = ksTextsRepository.findById(id); if (optionalDocument.isPresent()) { KSTexts ksTexts = optionalDocument.get(); - if ("NEW".equals(ksTexts.getIngestionStatus())) { - return ingestText(ksTexts); - } else { - ingestionOutput.setMessage("OOPS: TEXT is already Injected"); - return ingestionOutput; - } + return ingestText(ksTexts,textToBeEmbed,KsExternalDocUniqueID); } else { ingestionOutput.setMessage("OOPS: TEXT Not found"); return ingestionOutput; } } - private IngestionOutput ingestText(KSTexts ksTexts) { + private IngestionOutput ingestText(KSTexts ksTexts,String textToBeEmbed,String KsExternalDocUniqueID) { IngestionOutput ingestionLoopOutput = new IngestionOutput(); try { - ksTexts.setIngestionStatus("IN PROGRESS"); - ksTextsRepository.save(ksTexts); - - //TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(new Document(ksTexts.getTextToEmbed())); - Document myDoc = new Document(ksTexts.getTextToEmbed()); - + Document myDoc = new Document(textToBeEmbed); List docs = Collections.singletonList(myDoc);;//tikaDocumentReader.read(); logger.info("Ingested Text: " + ksTexts.getName()); @@ -211,12 +201,14 @@ public class KSIngestor { HashMap meta2 = new HashMap(); meta2.putAll(meta); meta2.putAll(meta1); + meta2.put("KsInternalMainEntityId",ksTexts.getId()); + meta2.put("KsExternalDocUniqueID",KsExternalDocUniqueID); for (Document splitDoc : splitDocs) { splitDoc.getMetadata().putAll(meta2); } embedtexts(splitDocs); }); - ksTexts.setIngestionStatus("INGESTED"); + //ksTexts.setIngestionStatus("INGESTED"); ksTexts.setIngestionDate(new Date()); ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date())); ksTextsRepository.save(ksTexts);