ingestion in one go

This commit is contained in:
sumedh
2024-10-04 20:31:37 +05:30
parent 962d135bdc
commit 1c316767f4
6 changed files with 103 additions and 42 deletions

View File

@@ -4,12 +4,15 @@ import java.util.HashMap;
import java.util.Date;
import java.text.SimpleDateFormat;
import java.util.List;
import java.util.Optional;
import com.olympus.apollo.dto.DeletionRequest;
import com.olympus.apollo.dto.ExternalFileIngestionDTO;
import com.olympus.apollo.dto.IngestionOutput;
import com.olympus.apollo.models.KSGitInfo;
import com.olympus.apollo.models.KSTexts;
import com.olympus.apollo.repository.KSTextsRepository;
import com.olympus.apollo.services.DeletionService;
import com.olympus.apollo.services.KSIngestor;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
@@ -24,6 +27,8 @@ import com.olympus.apollo.exception.StorageFileNotFoundException;
import com.olympus.apollo.services.StorageService;
import com.olympus.apollo.dto.FileUploadDTO;
import javax.swing.text.html.Option;
@RestController
public class KSFileController {
@Autowired
@@ -37,6 +42,8 @@ public class KSFileController {
private KSTextsRepository ksTextsRepository;
@Autowired
private KSIngestor ksIngestor;
@Autowired
private DeletionService deletionService;
@PostMapping("/upload")
@@ -89,36 +96,44 @@ public class KSFileController {
public String handleExternalIngestion(
@RequestBody ExternalFileIngestionDTO externalFileIngestionDTO
) {
KSTexts ksTexts = new KSTexts();
ksTexts.setTextToEmbed(externalFileIngestionDTO.getTextToEmbed());
//ksTexts.setTextToEmbed(externalFileIngestionDTO.getTextToEmbed());
ksTexts.setName(externalFileIngestionDTO.getName());
ksTexts.setDescription(externalFileIngestionDTO.getDescription());
ksTexts.setIngestionStatus("NEW");
ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date()));
Optional<KSTexts> ksTextsInfoOpt= ksTextsRepository.findByName(externalFileIngestionDTO.getName());
if(ksTextsInfoOpt.isEmpty()){
ksTexts.setName(externalFileIngestionDTO.getName());
ksTexts.setDescription(externalFileIngestionDTO.getDescription());
//ksTexts.setIngestionStatus("NEW");
ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date()));
Date now = new Date();
ksTexts.setIngestionDate(now);
Date now = new Date();
ksTexts.setIngestionDate(now);
HashMap IngestionInfo = new HashMap<>();
ksTexts.setType(externalFileIngestionDTO.getType());
HashMap<String, String> metadata = new HashMap<>();
metadata.put("KsApplicationName", externalFileIngestionDTO.getKsApplicationName());
metadata.put("KsDoctype", externalFileIngestionDTO.getKsDocType());
metadata.put("KsDocSource", externalFileIngestionDTO.getKsDocSource());
metadata.put("KsFileSource", externalFileIngestionDTO.getName());
IngestionInfo.put("type",externalFileIngestionDTO.getType());
IngestionInfo.put("metadata",metadata);
//ksTexts.setMetadata(metadata);
ksTexts.setIngestionInfo(IngestionInfo);
ksTexts.setDefaultChunkSize(externalFileIngestionDTO.getDefaultChunkSize());
ksTexts.setMinChunkSize(externalFileIngestionDTO.getMinChunkSize());
ksTexts.setMaxNumberOfChunks(externalFileIngestionDTO.getMaxNumberOfChunks());
ksTexts.setMinChunkSizeToEmbed(externalFileIngestionDTO.getMinChunkSizeToEmbed());
ksTexts.setAdditionalMetadata(externalFileIngestionDTO.getAdditionalMetaData());
HashMap IngestionInfo = new HashMap<>();
ksTexts.setType(externalFileIngestionDTO.getType());
HashMap<String, String> metadata = new HashMap<>();
metadata.put("KsApplicationName", externalFileIngestionDTO.getKsApplicationName());
metadata.put("KsDoctype", externalFileIngestionDTO.getKsDocType());
metadata.put("KsDocSource", externalFileIngestionDTO.getKsDocSource());
metadata.put("KsFileSource", externalFileIngestionDTO.getName());
IngestionInfo.put("type",externalFileIngestionDTO.getType());
IngestionInfo.put("metadata",metadata);
//ksTexts.setMetadata(metadata);
ksTexts.setIngestionInfo(IngestionInfo);
ksTexts.setDefaultChunkSize(externalFileIngestionDTO.getDefaultChunkSize());
ksTexts.setMinChunkSize(externalFileIngestionDTO.getMinChunkSize());
ksTexts.setMaxNumberOfChunks(externalFileIngestionDTO.getMaxNumberOfChunks());
ksTexts.setMinChunkSizeToEmbed(externalFileIngestionDTO.getMinChunkSizeToEmbed());
ksTexts.setAdditionalMetadata(externalFileIngestionDTO.getAdditionalMetaData());
ksTextsRepository.save(ksTexts);
System.out.println(ksTexts.getId());
ksTextsRepository.save(ksTexts);
ksIngestor.ingestTextById(ksTexts.getId(),externalFileIngestionDTO.getTextToEmbed(),externalFileIngestionDTO.getKsExternalDocUniqueId());
}else {
KSTexts ksTexts1 = ksTextsInfoOpt.get();
ksIngestor.ingestTextById(ksTexts1.getId(),externalFileIngestionDTO.getTextToEmbed(),externalFileIngestionDTO.getKsExternalDocUniqueId());
}
return "OK";
}
@@ -129,8 +144,14 @@ public class KSFileController {
return result;
}
@GetMapping("/ingest_texts/{id}")
@DeleteMapping("/deleteTextRecords/{id}")
public ResponseEntity<String> deleteTextRecords(@PathVariable String id){
deletionService.deleteTextRecords(id);
return ResponseEntity.ok("Request In Working");
}
/*@GetMapping("/ingest_texts/{id}")
public IngestionOutput ingestDocumentById(@PathVariable String id) {
return ksIngestor.ingestTextById(id);
}
}*/
}

View File

@@ -20,4 +20,5 @@ public class ExternalFileIngestionDTO {
private int maxNumberOfChunks;
private int minChunkSizeToEmbed;
private HashMap additionalMetaData;
private String ksExternalDocUniqueId;
}

View File

@@ -2,14 +2,20 @@ package com.olympus.apollo.repository;
import com.olympus.apollo.models.KSDocument;
import com.olympus.apollo.models.KSGitInfo;
import com.olympus.apollo.models.KSTexts;
import org.springframework.data.mongodb.repository.MongoRepository;
import org.springframework.data.mongodb.repository.Query;
import org.springframework.data.rest.core.annotation.RepositoryRestResource;
import org.springframework.web.bind.annotation.CrossOrigin;
import java.util.Optional;
@RepositoryRestResource(collectionResourceRel = "ksinternal", path = "ksinternal")
@CrossOrigin
public interface KSTextsRepository extends MongoRepository<KSTexts, String> {
public Iterable<KSDocument> findAllByIngestionStatus(String status);
@Query("{'name': ?0}")
Optional<KSTexts> findByName(String name);
}

View File

@@ -32,6 +32,9 @@ public interface VectorStoreRepository extends MongoRepository<VectorStore, Stri
@Query("{'metadata.filePath': ?0}")
Optional<VectorStore> findByFilePath(String filePath);
@Query("{'metadata.KsInternalMainEntityId': ?0}")
List<VectorStore> findByKsInternalMainEntityId(String filePath);
@Query("{'metadata.KsApplicationName': ?0, 'metadata.KsBranch': ?1, 'metadata.filePath': ?2}")
Optional<VectorStore> findByKsapplicationNameKsBranchFilePath(String ksApplicationName,String ksBranch,String filePath);
}

View File

@@ -18,6 +18,8 @@ import org.springframework.messaging.simp.SimpMessagingTemplate;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
@@ -30,6 +32,9 @@ public class DeletionService {
@Autowired
private KSDocumentRepository ksDocumentRepository;
@Autowired
private KSTextsRepository ksTextsRepository;
@Autowired
private KSIngestionInfoRepository ksIngestionInfoRepository;
@@ -225,4 +230,37 @@ public class DeletionService {
}
}
}
@Async("asyncTaskExecutor")
public void deleteTextRecords(String id) {
try {
boolean KSTextExists = ksTextsRepository.existsById(id);
List<VectorStore> vectorStoreMetadataDetails = vectorStoreRepository.findByKsInternalMainEntityId(id);
if (KSTextExists && !vectorStoreMetadataDetails.isEmpty()) {
for (VectorStore store : vectorStoreMetadataDetails) {
vectorStoreRepository.deleteById(store.getId());
logger.info("VectorStore with id {} deleted successfully.", store.getId()+" ");
}
ksTextsRepository.deleteById(id);
logger.info("KSDocument with id {} deleted successfully.", id+" ");
logger.info("All records deleted successfully.");
} else {
if (!KSTextExists) {
logger.warn("KSDocument with id {} does not exist.", id+" ");
} else if (vectorStoreMetadataDetails.isEmpty()) {
logger.warn("No VectorStore Data available",Thread.currentThread().getName());
}
}
} catch (Exception e) {
logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName());
throw new RuntimeException("An error occurred while deleting records", e);
}
}
}

View File

@@ -164,33 +164,23 @@ public class KSIngestor {
return ingestionLoopOutput;
}
public IngestionOutput ingestTextById(String id) {
public IngestionOutput ingestTextById(String id,String textToBeEmbed,String KsExternalDocUniqueID) {
IngestionOutput ingestionOutput= new IngestionOutput();
Optional<KSTexts> optionalDocument = ksTextsRepository.findById(id);
if (optionalDocument.isPresent()) {
KSTexts ksTexts = optionalDocument.get();
if ("NEW".equals(ksTexts.getIngestionStatus())) {
return ingestText(ksTexts);
} else {
ingestionOutput.setMessage("OOPS: TEXT is already Injected");
return ingestionOutput;
}
return ingestText(ksTexts,textToBeEmbed,KsExternalDocUniqueID);
} else {
ingestionOutput.setMessage("OOPS: TEXT Not found");
return ingestionOutput;
}
}
private IngestionOutput ingestText(KSTexts ksTexts) {
private IngestionOutput ingestText(KSTexts ksTexts,String textToBeEmbed,String KsExternalDocUniqueID) {
IngestionOutput ingestionLoopOutput = new IngestionOutput();
try {
ksTexts.setIngestionStatus("IN PROGRESS");
ksTextsRepository.save(ksTexts);
//TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(new Document(ksTexts.getTextToEmbed()));
Document myDoc = new Document(ksTexts.getTextToEmbed());
Document myDoc = new Document(textToBeEmbed);
List<Document> docs = Collections.singletonList(myDoc);;//tikaDocumentReader.read();
logger.info("Ingested Text: " + ksTexts.getName());
@@ -211,12 +201,14 @@ public class KSIngestor {
HashMap meta2 = new HashMap();
meta2.putAll(meta);
meta2.putAll(meta1);
meta2.put("KsInternalMainEntityId",ksTexts.getId());
meta2.put("KsExternalDocUniqueID",KsExternalDocUniqueID);
for (Document splitDoc : splitDocs) {
splitDoc.getMetadata().putAll(meta2);
}
embedtexts(splitDocs);
});
ksTexts.setIngestionStatus("INGESTED");
//ksTexts.setIngestionStatus("INGESTED");
ksTexts.setIngestionDate(new Date());
ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date()));
ksTextsRepository.save(ksTexts);