ingestion in one go
This commit is contained in:
@@ -4,12 +4,15 @@ import java.util.HashMap;
|
||||
import java.util.Date;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.olympus.apollo.dto.DeletionRequest;
|
||||
import com.olympus.apollo.dto.ExternalFileIngestionDTO;
|
||||
import com.olympus.apollo.dto.IngestionOutput;
|
||||
import com.olympus.apollo.models.KSGitInfo;
|
||||
import com.olympus.apollo.models.KSTexts;
|
||||
import com.olympus.apollo.repository.KSTextsRepository;
|
||||
import com.olympus.apollo.services.DeletionService;
|
||||
import com.olympus.apollo.services.KSIngestor;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
@@ -24,6 +27,8 @@ import com.olympus.apollo.exception.StorageFileNotFoundException;
|
||||
import com.olympus.apollo.services.StorageService;
|
||||
import com.olympus.apollo.dto.FileUploadDTO;
|
||||
|
||||
import javax.swing.text.html.Option;
|
||||
|
||||
@RestController
|
||||
public class KSFileController {
|
||||
@Autowired
|
||||
@@ -37,6 +42,8 @@ public class KSFileController {
|
||||
private KSTextsRepository ksTextsRepository;
|
||||
@Autowired
|
||||
private KSIngestor ksIngestor;
|
||||
@Autowired
|
||||
private DeletionService deletionService;
|
||||
|
||||
|
||||
@PostMapping("/upload")
|
||||
@@ -89,36 +96,44 @@ public class KSFileController {
|
||||
public String handleExternalIngestion(
|
||||
@RequestBody ExternalFileIngestionDTO externalFileIngestionDTO
|
||||
) {
|
||||
|
||||
KSTexts ksTexts = new KSTexts();
|
||||
ksTexts.setTextToEmbed(externalFileIngestionDTO.getTextToEmbed());
|
||||
//ksTexts.setTextToEmbed(externalFileIngestionDTO.getTextToEmbed());
|
||||
ksTexts.setName(externalFileIngestionDTO.getName());
|
||||
ksTexts.setDescription(externalFileIngestionDTO.getDescription());
|
||||
ksTexts.setIngestionStatus("NEW");
|
||||
ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date()));
|
||||
Optional<KSTexts> ksTextsInfoOpt= ksTextsRepository.findByName(externalFileIngestionDTO.getName());
|
||||
if(ksTextsInfoOpt.isEmpty()){
|
||||
ksTexts.setName(externalFileIngestionDTO.getName());
|
||||
ksTexts.setDescription(externalFileIngestionDTO.getDescription());
|
||||
//ksTexts.setIngestionStatus("NEW");
|
||||
ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date()));
|
||||
|
||||
Date now = new Date();
|
||||
ksTexts.setIngestionDate(now);
|
||||
Date now = new Date();
|
||||
ksTexts.setIngestionDate(now);
|
||||
|
||||
HashMap IngestionInfo = new HashMap<>();
|
||||
ksTexts.setType(externalFileIngestionDTO.getType());
|
||||
HashMap<String, String> metadata = new HashMap<>();
|
||||
metadata.put("KsApplicationName", externalFileIngestionDTO.getKsApplicationName());
|
||||
metadata.put("KsDoctype", externalFileIngestionDTO.getKsDocType());
|
||||
metadata.put("KsDocSource", externalFileIngestionDTO.getKsDocSource());
|
||||
metadata.put("KsFileSource", externalFileIngestionDTO.getName());
|
||||
IngestionInfo.put("type",externalFileIngestionDTO.getType());
|
||||
IngestionInfo.put("metadata",metadata);
|
||||
//ksTexts.setMetadata(metadata);
|
||||
ksTexts.setIngestionInfo(IngestionInfo);
|
||||
ksTexts.setDefaultChunkSize(externalFileIngestionDTO.getDefaultChunkSize());
|
||||
ksTexts.setMinChunkSize(externalFileIngestionDTO.getMinChunkSize());
|
||||
ksTexts.setMaxNumberOfChunks(externalFileIngestionDTO.getMaxNumberOfChunks());
|
||||
ksTexts.setMinChunkSizeToEmbed(externalFileIngestionDTO.getMinChunkSizeToEmbed());
|
||||
ksTexts.setAdditionalMetadata(externalFileIngestionDTO.getAdditionalMetaData());
|
||||
|
||||
HashMap IngestionInfo = new HashMap<>();
|
||||
ksTexts.setType(externalFileIngestionDTO.getType());
|
||||
HashMap<String, String> metadata = new HashMap<>();
|
||||
metadata.put("KsApplicationName", externalFileIngestionDTO.getKsApplicationName());
|
||||
metadata.put("KsDoctype", externalFileIngestionDTO.getKsDocType());
|
||||
metadata.put("KsDocSource", externalFileIngestionDTO.getKsDocSource());
|
||||
metadata.put("KsFileSource", externalFileIngestionDTO.getName());
|
||||
IngestionInfo.put("type",externalFileIngestionDTO.getType());
|
||||
IngestionInfo.put("metadata",metadata);
|
||||
//ksTexts.setMetadata(metadata);
|
||||
ksTexts.setIngestionInfo(IngestionInfo);
|
||||
ksTexts.setDefaultChunkSize(externalFileIngestionDTO.getDefaultChunkSize());
|
||||
ksTexts.setMinChunkSize(externalFileIngestionDTO.getMinChunkSize());
|
||||
ksTexts.setMaxNumberOfChunks(externalFileIngestionDTO.getMaxNumberOfChunks());
|
||||
ksTexts.setMinChunkSizeToEmbed(externalFileIngestionDTO.getMinChunkSizeToEmbed());
|
||||
ksTexts.setAdditionalMetadata(externalFileIngestionDTO.getAdditionalMetaData());
|
||||
ksTextsRepository.save(ksTexts);
|
||||
System.out.println(ksTexts.getId());
|
||||
|
||||
ksTextsRepository.save(ksTexts);
|
||||
ksIngestor.ingestTextById(ksTexts.getId(),externalFileIngestionDTO.getTextToEmbed(),externalFileIngestionDTO.getKsExternalDocUniqueId());
|
||||
}else {
|
||||
KSTexts ksTexts1 = ksTextsInfoOpt.get();
|
||||
ksIngestor.ingestTextById(ksTexts1.getId(),externalFileIngestionDTO.getTextToEmbed(),externalFileIngestionDTO.getKsExternalDocUniqueId());
|
||||
}
|
||||
|
||||
return "OK";
|
||||
}
|
||||
@@ -129,8 +144,14 @@ public class KSFileController {
|
||||
return result;
|
||||
}
|
||||
|
||||
@GetMapping("/ingest_texts/{id}")
|
||||
@DeleteMapping("/deleteTextRecords/{id}")
|
||||
public ResponseEntity<String> deleteTextRecords(@PathVariable String id){
|
||||
deletionService.deleteTextRecords(id);
|
||||
return ResponseEntity.ok("Request In Working");
|
||||
}
|
||||
|
||||
/*@GetMapping("/ingest_texts/{id}")
|
||||
public IngestionOutput ingestDocumentById(@PathVariable String id) {
|
||||
return ksIngestor.ingestTextById(id);
|
||||
}
|
||||
}*/
|
||||
}
|
||||
@@ -20,4 +20,5 @@ public class ExternalFileIngestionDTO {
|
||||
private int maxNumberOfChunks;
|
||||
private int minChunkSizeToEmbed;
|
||||
private HashMap additionalMetaData;
|
||||
private String ksExternalDocUniqueId;
|
||||
}
|
||||
|
||||
@@ -2,14 +2,20 @@ package com.olympus.apollo.repository;
|
||||
|
||||
|
||||
import com.olympus.apollo.models.KSDocument;
|
||||
import com.olympus.apollo.models.KSGitInfo;
|
||||
import com.olympus.apollo.models.KSTexts;
|
||||
import org.springframework.data.mongodb.repository.MongoRepository;
|
||||
import org.springframework.data.mongodb.repository.Query;
|
||||
import org.springframework.data.rest.core.annotation.RepositoryRestResource;
|
||||
import org.springframework.web.bind.annotation.CrossOrigin;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
@RepositoryRestResource(collectionResourceRel = "ksinternal", path = "ksinternal")
|
||||
@CrossOrigin
|
||||
public interface KSTextsRepository extends MongoRepository<KSTexts, String> {
|
||||
|
||||
public Iterable<KSDocument> findAllByIngestionStatus(String status);
|
||||
@Query("{'name': ?0}")
|
||||
Optional<KSTexts> findByName(String name);
|
||||
|
||||
}
|
||||
|
||||
@@ -32,6 +32,9 @@ public interface VectorStoreRepository extends MongoRepository<VectorStore, Stri
|
||||
@Query("{'metadata.filePath': ?0}")
|
||||
Optional<VectorStore> findByFilePath(String filePath);
|
||||
|
||||
@Query("{'metadata.KsInternalMainEntityId': ?0}")
|
||||
List<VectorStore> findByKsInternalMainEntityId(String filePath);
|
||||
|
||||
@Query("{'metadata.KsApplicationName': ?0, 'metadata.KsBranch': ?1, 'metadata.filePath': ?2}")
|
||||
Optional<VectorStore> findByKsapplicationNameKsBranchFilePath(String ksApplicationName,String ksBranch,String filePath);
|
||||
}
|
||||
|
||||
@@ -18,6 +18,8 @@ import org.springframework.messaging.simp.SimpMessagingTemplate;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
@@ -30,6 +32,9 @@ public class DeletionService {
|
||||
@Autowired
|
||||
private KSDocumentRepository ksDocumentRepository;
|
||||
|
||||
@Autowired
|
||||
private KSTextsRepository ksTextsRepository;
|
||||
|
||||
@Autowired
|
||||
private KSIngestionInfoRepository ksIngestionInfoRepository;
|
||||
|
||||
@@ -225,4 +230,37 @@ public class DeletionService {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Async("asyncTaskExecutor")
|
||||
public void deleteTextRecords(String id) {
|
||||
try {
|
||||
boolean KSTextExists = ksTextsRepository.existsById(id);
|
||||
|
||||
List<VectorStore> vectorStoreMetadataDetails = vectorStoreRepository.findByKsInternalMainEntityId(id);
|
||||
|
||||
if (KSTextExists && !vectorStoreMetadataDetails.isEmpty()) {
|
||||
for (VectorStore store : vectorStoreMetadataDetails) {
|
||||
vectorStoreRepository.deleteById(store.getId());
|
||||
logger.info("VectorStore with id {} deleted successfully.", store.getId()+" ");
|
||||
}
|
||||
|
||||
ksTextsRepository.deleteById(id);
|
||||
logger.info("KSDocument with id {} deleted successfully.", id+" ");
|
||||
|
||||
|
||||
logger.info("All records deleted successfully.");
|
||||
} else {
|
||||
if (!KSTextExists) {
|
||||
logger.warn("KSDocument with id {} does not exist.", id+" ");
|
||||
} else if (vectorStoreMetadataDetails.isEmpty()) {
|
||||
logger.warn("No VectorStore Data available",Thread.currentThread().getName());
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName());
|
||||
throw new RuntimeException("An error occurred while deleting records", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -164,33 +164,23 @@ public class KSIngestor {
|
||||
return ingestionLoopOutput;
|
||||
}
|
||||
|
||||
public IngestionOutput ingestTextById(String id) {
|
||||
public IngestionOutput ingestTextById(String id,String textToBeEmbed,String KsExternalDocUniqueID) {
|
||||
IngestionOutput ingestionOutput= new IngestionOutput();
|
||||
Optional<KSTexts> optionalDocument = ksTextsRepository.findById(id);
|
||||
if (optionalDocument.isPresent()) {
|
||||
KSTexts ksTexts = optionalDocument.get();
|
||||
if ("NEW".equals(ksTexts.getIngestionStatus())) {
|
||||
return ingestText(ksTexts);
|
||||
} else {
|
||||
ingestionOutput.setMessage("OOPS: TEXT is already Injected");
|
||||
return ingestionOutput;
|
||||
}
|
||||
return ingestText(ksTexts,textToBeEmbed,KsExternalDocUniqueID);
|
||||
} else {
|
||||
ingestionOutput.setMessage("OOPS: TEXT Not found");
|
||||
return ingestionOutput;
|
||||
}
|
||||
}
|
||||
|
||||
private IngestionOutput ingestText(KSTexts ksTexts) {
|
||||
private IngestionOutput ingestText(KSTexts ksTexts,String textToBeEmbed,String KsExternalDocUniqueID) {
|
||||
IngestionOutput ingestionLoopOutput = new IngestionOutput();
|
||||
try {
|
||||
ksTexts.setIngestionStatus("IN PROGRESS");
|
||||
ksTextsRepository.save(ksTexts);
|
||||
|
||||
|
||||
//TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(new Document(ksTexts.getTextToEmbed()));
|
||||
Document myDoc = new Document(ksTexts.getTextToEmbed());
|
||||
|
||||
Document myDoc = new Document(textToBeEmbed);
|
||||
|
||||
List<Document> docs = Collections.singletonList(myDoc);;//tikaDocumentReader.read();
|
||||
logger.info("Ingested Text: " + ksTexts.getName());
|
||||
@@ -211,12 +201,14 @@ public class KSIngestor {
|
||||
HashMap meta2 = new HashMap();
|
||||
meta2.putAll(meta);
|
||||
meta2.putAll(meta1);
|
||||
meta2.put("KsInternalMainEntityId",ksTexts.getId());
|
||||
meta2.put("KsExternalDocUniqueID",KsExternalDocUniqueID);
|
||||
for (Document splitDoc : splitDocs) {
|
||||
splitDoc.getMetadata().putAll(meta2);
|
||||
}
|
||||
embedtexts(splitDocs);
|
||||
});
|
||||
ksTexts.setIngestionStatus("INGESTED");
|
||||
//ksTexts.setIngestionStatus("INGESTED");
|
||||
ksTexts.setIngestionDate(new Date());
|
||||
ksTexts.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date()));
|
||||
ksTextsRepository.save(ksTexts);
|
||||
|
||||
Reference in New Issue
Block a user