use azure search
This commit is contained in:
20
pom.xml
20
pom.xml
@@ -58,7 +58,7 @@
|
|||||||
<artifactId>spring-boot-starter-data-rest</artifactId>
|
<artifactId>spring-boot-starter-data-rest</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<!--<dependency>
|
||||||
<groupId>org.springframework.ai</groupId>
|
<groupId>org.springframework.ai</groupId>
|
||||||
<artifactId>spring-ai-mongodb-atlas-store-spring-boot-starter</artifactId>
|
<artifactId>spring-ai-mongodb-atlas-store-spring-boot-starter</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
@@ -67,11 +67,23 @@
|
|||||||
<groupId>org.springframework.ai</groupId>
|
<groupId>org.springframework.ai</groupId>
|
||||||
<artifactId>spring-ai-openai-spring-boot-starter</artifactId>
|
<artifactId>spring-ai-openai-spring-boot-starter</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
<artifactId>spring-ai-azure-openai-spring-boot-starter</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
<artifactId>spring-ai-azure-store</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.cloud</groupId>
|
<groupId>org.springframework.cloud</groupId>
|
||||||
<artifactId>spring-cloud-starter-netflix-eureka-client</artifactId>
|
<artifactId>spring-cloud-starter-netflix-eureka-client</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.cloud</groupId>
|
<groupId>org.springframework.cloud</groupId>
|
||||||
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
||||||
@@ -87,6 +99,12 @@
|
|||||||
<version>1.0.0-SNAPSHOT</version>
|
<version>1.0.0-SNAPSHOT</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springdoc</groupId>
|
||||||
|
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||||
|
<version>2.5.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.projectlombok</groupId>
|
<groupId>org.projectlombok</groupId>
|
||||||
<artifactId>lombok</artifactId>
|
<artifactId>lombok</artifactId>
|
||||||
|
|||||||
@@ -1,10 +1,18 @@
|
|||||||
package com.olympus.apollo.config;
|
package com.olympus.apollo.config;
|
||||||
|
|
||||||
|
import com.azure.core.credential.AzureKeyCredential;
|
||||||
|
import com.azure.search.documents.indexes.SearchIndexClient;
|
||||||
|
import com.azure.search.documents.indexes.SearchIndexClientBuilder;
|
||||||
|
import org.springframework.ai.azure.openai.AzureOpenAiEmbeddingModel;
|
||||||
import org.springframework.ai.embedding.EmbeddingModel;
|
import org.springframework.ai.embedding.EmbeddingModel;
|
||||||
import org.springframework.ai.openai.OpenAiEmbeddingModel;
|
/*import org.springframework.ai.openai.OpenAiEmbeddingModel;
|
||||||
import org.springframework.ai.openai.api.OpenAiApi;
|
import org.springframework.ai.openai.api.OpenAiApi;
|
||||||
import org.springframework.ai.vectorstore.MongoDBAtlasVectorStore;
|
import org.springframework.ai.vectorstore.MongoDBAtlasVectorStore;*/
|
||||||
|
|
||||||
import org.springframework.ai.vectorstore.VectorStore;
|
import org.springframework.ai.vectorstore.VectorStore;
|
||||||
|
import org.springframework.ai.vectorstore.azure.AzureVectorStore;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.boot.SpringBootConfiguration;
|
import org.springframework.boot.SpringBootConfiguration;
|
||||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||||
@@ -12,34 +20,39 @@ import org.springframework.context.annotation.Bean;
|
|||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
import org.springframework.data.mongodb.core.MongoTemplate;
|
import org.springframework.data.mongodb.core.MongoTemplate;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
@Configuration
|
@Configuration
|
||||||
@SpringBootConfiguration
|
|
||||||
@EnableAutoConfiguration
|
|
||||||
public class EmbeddingConfig {
|
public class EmbeddingConfig {
|
||||||
@Value("${spring.ai.openai.api-key}")
|
|
||||||
private String openAiKey;
|
|
||||||
@Value("${spring.data.mongodb.database}")
|
@Value("${spring.ai.vectorstore.azure.api-key}")
|
||||||
private String databaseName;
|
private String azureKey;
|
||||||
@Value("${spring.ai.vectorstore.mongodb.collection-name:vector_store}")
|
@Value("${spring.ai.vectorstore.azure.url}")
|
||||||
private String collectionName;
|
private String azureEndpoint;
|
||||||
@Value("${spring.ai.vectorstore.mongodb.indexName:vector_index}")
|
@Value("${spring.ai.vectorstore.azure.initialize-schema}")
|
||||||
private String indexName;
|
private boolean initSchema;
|
||||||
@Value("${spring.data.mongodb.uri}")
|
|
||||||
private String mongoUri;
|
|
||||||
@Value("${spring.ai.vectorstore.mongodb.initialize-schema}")
|
|
||||||
private Boolean initSchema;
|
|
||||||
// Add beans here...
|
|
||||||
|
|
||||||
@Bean
|
@Bean
|
||||||
public EmbeddingModel embeddingModel() {
|
public SearchIndexClient searchIndexClient() {
|
||||||
return new OpenAiEmbeddingModel(new OpenAiApi(openAiKey));
|
return new SearchIndexClientBuilder().endpoint(azureEndpoint)
|
||||||
|
.credential(new AzureKeyCredential(azureKey))
|
||||||
|
.buildClient();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Bean
|
@Bean
|
||||||
public VectorStore mongodbVectorStore(MongoTemplate mongoTemplate, EmbeddingModel embeddingModel) {
|
public VectorStore vectorStore(SearchIndexClient searchIndexClient, @Qualifier("azureOpenAiEmbeddingModel") EmbeddingModel embeddingModel) {
|
||||||
return new MongoDBAtlasVectorStore(mongoTemplate, embeddingModel,
|
List<AzureVectorStore.MetadataField> fields = new ArrayList<>();
|
||||||
MongoDBAtlasVectorStore.MongoDBVectorStoreConfig.builder().build(), initSchema);
|
|
||||||
|
fields.add(AzureVectorStore.MetadataField.text("KsApplicationName"));
|
||||||
|
fields.add(AzureVectorStore.MetadataField.text("KsProjectName"));
|
||||||
|
fields.add(AzureVectorStore.MetadataField.text("KsDoctype"));
|
||||||
|
fields.add(AzureVectorStore.MetadataField.text("KsDocSource"));
|
||||||
|
fields.add(AzureVectorStore.MetadataField.text("KsFileSource"));
|
||||||
|
fields.add(AzureVectorStore.MetadataField.text("KsDocumentId"));
|
||||||
|
|
||||||
|
return new AzureVectorStore(searchIndexClient, embeddingModel,initSchema, fields);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -59,7 +59,6 @@ public class KSFileController {
|
|||||||
Date now = new Date();
|
Date now = new Date();
|
||||||
ksDocument.setIngestionDate(now);
|
ksDocument.setIngestionDate(now);
|
||||||
|
|
||||||
|
|
||||||
KSIngestionInfo ksIngestionInfo = new KSIngestionInfo();
|
KSIngestionInfo ksIngestionInfo = new KSIngestionInfo();
|
||||||
ksIngestionInfo.setType(fileUploadDTO.getType()); // != null ? type : "MD_DOCUMENT"
|
ksIngestionInfo.setType(fileUploadDTO.getType()); // != null ? type : "MD_DOCUMENT"
|
||||||
|
|
||||||
@@ -68,6 +67,7 @@ public class KSFileController {
|
|||||||
metadata.put("KsDoctype", fileUploadDTO.getKsDocType());
|
metadata.put("KsDoctype", fileUploadDTO.getKsDocType());
|
||||||
metadata.put("KsDocSource", fileUploadDTO.getKsDocSource());
|
metadata.put("KsDocSource", fileUploadDTO.getKsDocSource());
|
||||||
metadata.put("KsFileSource", file.getOriginalFilename());
|
metadata.put("KsFileSource", file.getOriginalFilename());
|
||||||
|
metadata.put("KsProjectName", fileUploadDTO.getKsProjectName());
|
||||||
|
|
||||||
ksIngestionInfo.setMetadata(metadata);
|
ksIngestionInfo.setMetadata(metadata);
|
||||||
ksIngestionInfo.setDefaultChunkSize(fileUploadDTO.getDefaultChunkSize());
|
ksIngestionInfo.setDefaultChunkSize(fileUploadDTO.getDefaultChunkSize());
|
||||||
@@ -75,7 +75,7 @@ public class KSFileController {
|
|||||||
ksIngestionInfo.setMaxNumberOfChunks(fileUploadDTO.getMaxNumberOfChunks());
|
ksIngestionInfo.setMaxNumberOfChunks(fileUploadDTO.getMaxNumberOfChunks());
|
||||||
ksIngestionInfo.setMinChunkSizeToEmbed(fileUploadDTO.getMinChunkSizeToEmbed());
|
ksIngestionInfo.setMinChunkSizeToEmbed(fileUploadDTO.getMinChunkSizeToEmbed());
|
||||||
|
|
||||||
ksIngestionInfoRepository.save(ksIngestionInfo);
|
//ksIngestionInfoRepository.save(ksIngestionInfo);
|
||||||
ksDocument.setIngestionInfo(ksIngestionInfo);
|
ksDocument.setIngestionInfo(ksIngestionInfo);
|
||||||
ksDocumentREpository.save(ksDocument);
|
ksDocumentREpository.save(ksDocument);
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ public class CorsConfig implements WebMvcConfigurer {
|
|||||||
public void addCorsMappings(CorsRegistry registry) {
|
public void addCorsMappings(CorsRegistry registry) {
|
||||||
registry.addMapping("/**")
|
registry.addMapping("/**")
|
||||||
.allowedOrigins(apollo_frontend_url)
|
.allowedOrigins(apollo_frontend_url)
|
||||||
|
.allowedOriginPatterns("**")
|
||||||
.allowedHeaders("*")
|
.allowedHeaders("*")
|
||||||
.allowedMethods("GET", "POST", "PUT", "DELETE","OPTIONS");
|
.allowedMethods("GET", "POST", "PUT", "DELETE","OPTIONS");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,12 +9,15 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.olympus.dto.DeletionRequest;
|
import com.olympus.dto.DeletionRequest;
|
||||||
import com.olympus.model.apollo.VectorStore;
|
import org.springframework.ai.document.Document;
|
||||||
|
import org.springframework.ai.vectorstore.SearchRequest;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.messaging.simp.SimpMessagingTemplate;
|
import org.springframework.messaging.simp.SimpMessagingTemplate;
|
||||||
import org.springframework.scheduling.annotation.Async;
|
import org.springframework.scheduling.annotation.Async;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.ai.vectorstore.VectorStore;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.concurrent.CompletableFuture;
|
import java.util.concurrent.CompletableFuture;
|
||||||
@@ -45,41 +48,33 @@ public class DeletionService {
|
|||||||
@Autowired
|
@Autowired
|
||||||
private SimpMessagingTemplate simpMessagingTemplate;
|
private SimpMessagingTemplate simpMessagingTemplate;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private VectorStore vectorStore;
|
||||||
|
|
||||||
@Async("asyncTaskExecutor")
|
@Async("asyncTaskExecutor")
|
||||||
public void deleteRecords(DeletionRequest deletionRequest) {
|
public void deleteRecords(DeletionRequest deletionRequest) {
|
||||||
try {
|
try {
|
||||||
boolean KSDocumentExists = deletionRequest.getKsDocumentId() != null && !deletionRequest.getKsDocumentId().isEmpty() && ksDocumentRepository.existsById(deletionRequest.getKsDocumentId());
|
|
||||||
boolean KSIngestionInfoExists = deletionRequest.getKsIngestionInfoId() != null && !deletionRequest.getKsIngestionInfoId().isEmpty() && ksIngestionInfoRepository.existsById(deletionRequest.getKsIngestionInfoId());
|
//TODO: COMPLETE REFACTOR REQUIRED TO DELETE RECORD FROM AZURE SEARCH
|
||||||
boolean vectorStoreExists = deletionRequest.getKsApplicationName() != null && deletionRequest.getKsDocSource() != null && deletionRequest.getKsFileSource() != null && deletionRequest.getKsDoctype() != null;
|
// NOT WORKING AT THE MOMENT
|
||||||
|
boolean KSDocumentExists = deletionRequest.getKsDocumentId() != null &&
|
||||||
|
!deletionRequest.getKsDocumentId().isEmpty() &&
|
||||||
|
ksDocumentRepository.existsById(deletionRequest.getKsDocumentId());
|
||||||
|
if(KSDocumentExists){
|
||||||
|
SearchRequest searchRequest = SearchRequest.defaults()
|
||||||
|
.withQuery("a").withTopK(1000)
|
||||||
|
.withSimilarityThreshold(0.0)
|
||||||
|
.withFilterExpression("KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'");
|
||||||
|
|
||||||
|
|
||||||
List<VectorStore> vectorStoreMetadataDetails = vectorStoreExists ? vectorStoreRepository.findDocumentVectorByMetadata(deletionRequest.getKsDoctype(), deletionRequest.getKsDocSource(), deletionRequest.getKsFileSource(), deletionRequest.getKsApplicationName()) : List.of();
|
List<Document> docs = vectorStore.similaritySearch(searchRequest);
|
||||||
|
List<String> ids = docs.stream().map(Document::getId).toList();
|
||||||
|
vectorStore.delete(ids);
|
||||||
|
|
||||||
if (KSDocumentExists && KSIngestionInfoExists && !vectorStoreMetadataDetails.isEmpty()) {
|
ksDocumentRepository.deleteById(deletionRequest.getKsDocumentId());
|
||||||
if (deletionRequest.getKsDocumentId() != null && !deletionRequest.getKsDocumentId().isEmpty()) {
|
logger.info("KSDocument with id {} deleted successfully.", deletionRequest.getKsDocumentId());
|
||||||
ksDocumentRepository.deleteById(deletionRequest.getKsDocumentId());
|
}else{
|
||||||
logger.info("KSDocument with id {} deleted successfully.", deletionRequest.getKsDocumentId()+" "+Thread.currentThread().getName());
|
logger.warn("KSDocument with id {} does not exist.", deletionRequest.getKsDocumentId());
|
||||||
}
|
|
||||||
|
|
||||||
if (deletionRequest.getKsIngestionInfoId() != null && !deletionRequest.getKsIngestionInfoId().isEmpty()) {
|
|
||||||
ksIngestionInfoRepository.deleteById(deletionRequest.getKsIngestionInfoId());
|
|
||||||
logger.info("KSIngestionInfo with id {} deleted successfully.", deletionRequest.getKsIngestionInfoId()+" "+Thread.currentThread().getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (VectorStore store : vectorStoreMetadataDetails) {
|
|
||||||
vectorStoreRepository.deleteById(store.getId());
|
|
||||||
logger.info("VectorStore with id {} deleted successfully.", store.getId()+" "+Thread.currentThread().getName());
|
|
||||||
}
|
|
||||||
logger.info("All records deleted successfully.");
|
|
||||||
} else {
|
|
||||||
if (!KSDocumentExists) {
|
|
||||||
logger.warn("KSDocument with id {} does not exist.", deletionRequest.getKsDocumentId()+" "+Thread.currentThread().getName());
|
|
||||||
} else if (!KSIngestionInfoExists) {
|
|
||||||
logger.warn("KSIngestionInfo with id {} does not exist.", deletionRequest.getKsIngestionInfoId()+" "+Thread.currentThread().getName());
|
|
||||||
} else if (vectorStoreMetadataDetails.isEmpty()) {
|
|
||||||
logger.warn("No VectorStore Data available",Thread.currentThread().getName());
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName());
|
logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName());
|
||||||
@@ -122,9 +117,9 @@ public class DeletionService {
|
|||||||
String ingestionStatus = ksGitInfo.getIngestionStatus();
|
String ingestionStatus = ksGitInfo.getIngestionStatus();
|
||||||
logger.info("Ingestion Status is {}.", ingestionStatus);
|
logger.info("Ingestion Status is {}.", ingestionStatus);
|
||||||
|
|
||||||
List<VectorStore> vectorStoreMetadataDetails = vectorStoreGitDetailsExists
|
List<VectorStore> vectorStoreMetadataDetails = null; /*vectorStoreGitDetailsExists
|
||||||
? vectorStoreRepository.findGitVectorByMetadata(ksDoctype,ksDocSource, ksFileSource, applicationName, ksBranch)
|
? vectorStoreRepository.findGitVectorByMetadata(ksDoctype,ksDocSource, ksFileSource, applicationName, ksBranch)
|
||||||
: List.of();
|
: List.of();*/
|
||||||
|
|
||||||
if (KSGitInfoExists && KSGitIngestionInfoExists) {
|
if (KSGitInfoExists && KSGitIngestionInfoExists) {
|
||||||
deleteRecordsBasedOnIngestionStatus(ksGitInfoId,ksBranch,ingestionStatus,ksGitIngestionInfoId,vectorStoreMetadataDetails,applicationName);
|
deleteRecordsBasedOnIngestionStatus(ksGitInfoId,ksBranch,ingestionStatus,ksGitIngestionInfoId,vectorStoreMetadataDetails,applicationName);
|
||||||
@@ -218,11 +213,12 @@ public class DeletionService {
|
|||||||
|
|
||||||
private void deleteVectorStores(List<VectorStore> vectorStoreMetadataDetails, String applicationName){
|
private void deleteVectorStores(List<VectorStore> vectorStoreMetadataDetails, String applicationName){
|
||||||
if(!vectorStoreMetadataDetails.isEmpty()){
|
if(!vectorStoreMetadataDetails.isEmpty()){
|
||||||
for (VectorStore store : vectorStoreMetadataDetails) {
|
|
||||||
|
/* for (VectorStore store : vectorStoreMetadataDetails) {
|
||||||
String storeId=store.getId();
|
String storeId=store.getId();
|
||||||
vectorStoreRepository.deleteById(storeId);
|
vectorStoreRepository.deleteById(storeId);
|
||||||
logger.info("VectorStore with id {} deleted successfully.", applicationName, storeId);
|
logger.info("VectorStore with id {} deleted successfully.", applicationName, storeId);
|
||||||
}
|
}*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -233,7 +229,8 @@ public class DeletionService {
|
|||||||
try {
|
try {
|
||||||
boolean KSTextExists = ksTextsRepository.existsById(id);
|
boolean KSTextExists = ksTextsRepository.existsById(id);
|
||||||
|
|
||||||
List<VectorStore> vectorStoreMetadataDetails = vectorStoreRepository.findByKsInternalMainEntityId(id);
|
/*
|
||||||
|
List<Object> vectorStoreMetadataDetails = vectorStoreRepository.findByKsInternalMainEntityId(id);
|
||||||
|
|
||||||
if (KSTextExists && !vectorStoreMetadataDetails.isEmpty()) {
|
if (KSTextExists && !vectorStoreMetadataDetails.isEmpty()) {
|
||||||
for (VectorStore store : vectorStoreMetadataDetails) {
|
for (VectorStore store : vectorStoreMetadataDetails) {
|
||||||
@@ -252,7 +249,7 @@ public class DeletionService {
|
|||||||
} else if (vectorStoreMetadataDetails.isEmpty()) {
|
} else if (vectorStoreMetadataDetails.isEmpty()) {
|
||||||
logger.warn("No VectorStore Data available",Thread.currentThread().getName());
|
logger.warn("No VectorStore Data available",Thread.currentThread().getName());
|
||||||
}
|
}
|
||||||
}
|
}*/
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName());
|
logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName());
|
||||||
throw new RuntimeException("An error occurred while deleting records", e);
|
throw new RuntimeException("An error occurred while deleting records", e);
|
||||||
|
|||||||
@@ -28,15 +28,17 @@ public class KSIngestor {
|
|||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private KSDocumentRepository ksDocumentRepository;
|
private KSDocumentRepository ksDocumentRepository;
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private KSTextsRepository ksTextsRepository;
|
private KSTextsRepository ksTextsRepository;
|
||||||
@Autowired
|
|
||||||
private KSIngestionInfoRepository ksIngestionInfoRepository;
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private FileSystemStorageService storageService;
|
private FileSystemStorageService storageService;
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private VectorStore vectorStore;
|
private VectorStore vectorStore;
|
||||||
|
|
||||||
|
|
||||||
Logger logger = LoggerFactory.getLogger(KSIngestor.class);
|
Logger logger = LoggerFactory.getLogger(KSIngestor.class);
|
||||||
|
|
||||||
public void deleteAll(String document_file_name) {
|
public void deleteAll(String document_file_name) {
|
||||||
@@ -48,49 +50,11 @@ public class KSIngestor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public IngestionOutput ingestLoop() {
|
public IngestionOutput ingestLoop() {
|
||||||
|
|
||||||
IngestionOutput ingestionLoopOutput = new IngestionOutput();
|
IngestionOutput ingestionLoopOutput = new IngestionOutput();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
ksDocumentRepository.findAllByIngestionStatus("NEW").forEach(ksDocument -> {
|
ksDocumentRepository.findAllByIngestionStatus("NEW").forEach(ksDocument -> {
|
||||||
logger.info("Processing document: " + ksDocument.getFilePath());
|
ingestDocument(ksDocument);
|
||||||
// ingest the document
|
|
||||||
ksDocument.setIngestionStatus("IN PROGRESS");
|
|
||||||
ksDocumentRepository.save(ksDocument);
|
|
||||||
|
|
||||||
Resource file = storageService.loadAsResource(ksDocument.getFilePath());
|
|
||||||
TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(file);
|
|
||||||
|
|
||||||
List<Document> docs = tikaDocumentReader.read();
|
|
||||||
logger.info("Ingested document: " + ksDocument.getFilePath());
|
|
||||||
logger.info("Number of documents: " + docs.size());
|
|
||||||
|
|
||||||
KSIngestionInfo ingestionInfo = ksDocument.getIngestionInfo();
|
|
||||||
|
|
||||||
|
|
||||||
TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(),
|
|
||||||
ingestionInfo.getMinChunkSize(),
|
|
||||||
ingestionInfo.getMinChunkSizeToEmbed(),
|
|
||||||
ingestionInfo.getMaxNumberOfChunks(),
|
|
||||||
true);
|
|
||||||
|
|
||||||
|
|
||||||
docs.forEach(doc -> {
|
|
||||||
List<Document> splitDocs = splitter.split(doc);
|
|
||||||
|
|
||||||
logger.info("Number of documents: " + splitDocs.size());
|
|
||||||
for (Document splitDoc : splitDocs) {
|
|
||||||
logger.info("Split before put document metadata: " + splitDoc.getMetadata());
|
|
||||||
splitDoc.getMetadata().putAll(getMetadata(ingestionInfo));
|
|
||||||
logger.info("Split after put document metadata: " + splitDoc.getMetadata());
|
|
||||||
}
|
|
||||||
embedDocuments(splitDocs, ingestionInfo);
|
|
||||||
});
|
|
||||||
ksDocument.setIngestionStatus("INGESTED");//we have to set to DONE
|
|
||||||
ksDocument.setIngestionDate(new Date());
|
|
||||||
ksDocument.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date()));
|
|
||||||
|
|
||||||
ksDocumentRepository.save(ksDocument);
|
|
||||||
|
|
||||||
ingestionLoopOutput.getIngestedDocumentId().add(ksDocument.getId());
|
ingestionLoopOutput.getIngestedDocumentId().add(ksDocument.getId());
|
||||||
});
|
});
|
||||||
ingestionLoopOutput.setStatus("OK");
|
ingestionLoopOutput.setStatus("OK");
|
||||||
@@ -139,12 +103,15 @@ public class KSIngestor {
|
|||||||
ingestionInfo.getMaxNumberOfChunks(),
|
ingestionInfo.getMaxNumberOfChunks(),
|
||||||
true);
|
true);
|
||||||
|
|
||||||
|
HashMap<String, String> metadata = ingestionInfo.getMetadata();
|
||||||
|
metadata.put("KsDocumentId",ksDocument.getId());
|
||||||
|
|
||||||
docs.forEach(doc -> {
|
docs.forEach(doc -> {
|
||||||
List<Document> splitDocs = splitter.split(doc);
|
List<Document> splitDocs = splitter.split(doc);
|
||||||
|
|
||||||
logger.info("Number of documents: " + splitDocs.size());
|
logger.info("Number of documents: " + splitDocs.size());
|
||||||
for (Document splitDoc : splitDocs) {
|
for (Document splitDoc : splitDocs) {
|
||||||
splitDoc.getMetadata().putAll(getMetadata(ingestionInfo));
|
splitDoc.getMetadata().putAll(metadata);
|
||||||
}
|
}
|
||||||
embedDocuments(splitDocs, ingestionInfo);
|
embedDocuments(splitDocs, ingestionInfo);
|
||||||
});
|
});
|
||||||
@@ -251,11 +218,13 @@ public class KSIngestor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<String> testSimilaritySearch(String query,String filterQuery) {
|
public List<String> testSimilaritySearch(String query,String filterQuery) {
|
||||||
List<Document> docs = vectorStore.similaritySearch(
|
SearchRequest searchRequest = SearchRequest.defaults().withQuery(query).withTopK(5).withSimilarityThreshold(0.1);
|
||||||
SearchRequest.defaults()
|
|
||||||
.withQuery(query)
|
if(filterQuery!=null && !filterQuery.isEmpty()){
|
||||||
.withTopK(5).withSimilarityThreshold(0.8)
|
searchRequest.withFilterExpression(filterQuery);
|
||||||
.withFilterExpression(filterQuery));
|
}
|
||||||
|
|
||||||
|
List<Document> docs = vectorStore.similaritySearch(searchRequest);
|
||||||
|
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<String>();
|
||||||
for (Document doc : docs) {
|
for (Document doc : docs) {
|
||||||
@@ -265,19 +234,4 @@ public class KSIngestor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private HashMap<String, String> getMetadata(KSIngestionInfo ingestionInfo) {
|
|
||||||
|
|
||||||
return ingestionInfo.getMetadata();
|
|
||||||
|
|
||||||
/* HashMap<String, String> metadata = new HashMap<String, String>();
|
|
||||||
|
|
||||||
for (String meta : metadatas) {
|
|
||||||
String[] keyValue = meta.split(":");
|
|
||||||
metadata.put(keyValue[0], keyValue[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return metadata;*/
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -10,20 +10,23 @@ spring:
|
|||||||
application:
|
application:
|
||||||
name: apollo
|
name: apollo
|
||||||
ai:
|
ai:
|
||||||
|
azure:
|
||||||
|
openai:
|
||||||
|
endpoint: "https://ai-olympus.openai.azure.com/"
|
||||||
|
api-key: "9fb33cc69d914d4c8225b974876510b5"
|
||||||
openai:
|
openai:
|
||||||
api-key:
|
api-key: "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||||
vectorstore:
|
vectorstore:
|
||||||
mongodb:
|
azure:
|
||||||
uri:
|
api-key: "jxKqZvbMKuo1MwXs8ilEAeRDeswtoTXO1lWX600jP2AzSeDXo1nq"
|
||||||
indexName: vector_index
|
url: "https://search-olympus.search.windows.net"
|
||||||
collection-name: vector_store
|
initialize-schema: true
|
||||||
initialize-schema: false
|
|
||||||
data:
|
data:
|
||||||
mongodb:
|
mongodb:
|
||||||
uri:
|
uri: mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus
|
||||||
database:
|
database: olympus
|
||||||
username:
|
username: olympus_adm
|
||||||
password:
|
password: 26111979
|
||||||
servlet:
|
servlet:
|
||||||
multipart:
|
multipart:
|
||||||
max-file-size: 5000000MB
|
max-file-size: 5000000MB
|
||||||
@@ -34,8 +37,14 @@ ingestion:
|
|||||||
repository:
|
repository:
|
||||||
basepath: C:\\Users\\andrea.terzani\\dev\\Olympus
|
basepath: C:\\Users\\andrea.terzani\\dev\\Olympus
|
||||||
gitlab:
|
gitlab:
|
||||||
token:
|
token: "xxxxxxxx"
|
||||||
path: /mnt/apollo_storage/repository #C:\\repos\\olympus_ai\\gitClone
|
path: /mnt/apollo_storage/repository #C:\\repos\\olympus_ai\\gitClone
|
||||||
|
cloud:
|
||||||
|
url: "https://gi2tlab.com/api/v4"
|
||||||
|
token: "xxxxxxxx"
|
||||||
|
onpremises:
|
||||||
|
url: "http://localhost:8081/api"
|
||||||
|
token: "xxxxxxxx"
|
||||||
|
|
||||||
eureka:
|
eureka:
|
||||||
client:
|
client:
|
||||||
|
|||||||
Reference in New Issue
Block a user