diff --git a/pom.xml b/pom.xml index bfb1d9d..1a17aa6 100644 --- a/pom.xml +++ b/pom.xml @@ -58,7 +58,7 @@ spring-boot-starter-data-rest - + + + + org.springframework.ai + spring-ai-azure-openai-spring-boot-starter + + + + org.springframework.ai + spring-ai-azure-store + org.springframework.cloud spring-cloud-starter-netflix-eureka-client + org.springframework.cloud spring-cloud-starter-openfeign @@ -87,6 +99,12 @@ 1.0.0-SNAPSHOT + + org.springdoc + springdoc-openapi-starter-webmvc-ui + 2.5.0 + + org.projectlombok lombok diff --git a/src/main/java/com/olympus/apollo/config/EmbeddingConfig.java b/src/main/java/com/olympus/apollo/config/EmbeddingConfig.java index 75fecb4..d8a2643 100644 --- a/src/main/java/com/olympus/apollo/config/EmbeddingConfig.java +++ b/src/main/java/com/olympus/apollo/config/EmbeddingConfig.java @@ -1,10 +1,18 @@ package com.olympus.apollo.config; +import com.azure.core.credential.AzureKeyCredential; +import com.azure.search.documents.indexes.SearchIndexClient; +import com.azure.search.documents.indexes.SearchIndexClientBuilder; +import org.springframework.ai.azure.openai.AzureOpenAiEmbeddingModel; import org.springframework.ai.embedding.EmbeddingModel; -import org.springframework.ai.openai.OpenAiEmbeddingModel; +/*import org.springframework.ai.openai.OpenAiEmbeddingModel; import org.springframework.ai.openai.api.OpenAiApi; -import org.springframework.ai.vectorstore.MongoDBAtlasVectorStore; +import org.springframework.ai.vectorstore.MongoDBAtlasVectorStore;*/ + import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.ai.vectorstore.azure.AzureVectorStore; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.SpringBootConfiguration; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; @@ -12,34 +20,39 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.data.mongodb.core.MongoTemplate; +import java.util.ArrayList; +import java.util.List; + @Configuration -@SpringBootConfiguration -@EnableAutoConfiguration public class EmbeddingConfig { - @Value("${spring.ai.openai.api-key}") - private String openAiKey; - @Value("${spring.data.mongodb.database}") - private String databaseName; - @Value("${spring.ai.vectorstore.mongodb.collection-name:vector_store}") - private String collectionName; - @Value("${spring.ai.vectorstore.mongodb.indexName:vector_index}") - private String indexName; - @Value("${spring.data.mongodb.uri}") - private String mongoUri; - @Value("${spring.ai.vectorstore.mongodb.initialize-schema}") - private Boolean initSchema; - // Add beans here... + + + @Value("${spring.ai.vectorstore.azure.api-key}") + private String azureKey; + @Value("${spring.ai.vectorstore.azure.url}") + private String azureEndpoint; + @Value("${spring.ai.vectorstore.azure.initialize-schema}") + private boolean initSchema; @Bean - public EmbeddingModel embeddingModel() { - return new OpenAiEmbeddingModel(new OpenAiApi(openAiKey)); + public SearchIndexClient searchIndexClient() { + return new SearchIndexClientBuilder().endpoint(azureEndpoint) + .credential(new AzureKeyCredential(azureKey)) + .buildClient(); } - @Bean - public VectorStore mongodbVectorStore(MongoTemplate mongoTemplate, EmbeddingModel embeddingModel) { - return new MongoDBAtlasVectorStore(mongoTemplate, embeddingModel, - MongoDBAtlasVectorStore.MongoDBVectorStoreConfig.builder().build(), initSchema); + public VectorStore vectorStore(SearchIndexClient searchIndexClient, @Qualifier("azureOpenAiEmbeddingModel") EmbeddingModel embeddingModel) { + List fields = new ArrayList<>(); + + fields.add(AzureVectorStore.MetadataField.text("KsApplicationName")); + fields.add(AzureVectorStore.MetadataField.text("KsProjectName")); + fields.add(AzureVectorStore.MetadataField.text("KsDoctype")); + fields.add(AzureVectorStore.MetadataField.text("KsDocSource")); + fields.add(AzureVectorStore.MetadataField.text("KsFileSource")); + fields.add(AzureVectorStore.MetadataField.text("KsDocumentId")); + + return new AzureVectorStore(searchIndexClient, embeddingModel,initSchema, fields); } } diff --git a/src/main/java/com/olympus/apollo/controllers/KSFileController.java b/src/main/java/com/olympus/apollo/controllers/KSFileController.java index 5d0ffb8..f677088 100644 --- a/src/main/java/com/olympus/apollo/controllers/KSFileController.java +++ b/src/main/java/com/olympus/apollo/controllers/KSFileController.java @@ -59,7 +59,6 @@ public class KSFileController { Date now = new Date(); ksDocument.setIngestionDate(now); - KSIngestionInfo ksIngestionInfo = new KSIngestionInfo(); ksIngestionInfo.setType(fileUploadDTO.getType()); // != null ? type : "MD_DOCUMENT" @@ -68,6 +67,7 @@ public class KSFileController { metadata.put("KsDoctype", fileUploadDTO.getKsDocType()); metadata.put("KsDocSource", fileUploadDTO.getKsDocSource()); metadata.put("KsFileSource", file.getOriginalFilename()); + metadata.put("KsProjectName", fileUploadDTO.getKsProjectName()); ksIngestionInfo.setMetadata(metadata); ksIngestionInfo.setDefaultChunkSize(fileUploadDTO.getDefaultChunkSize()); @@ -75,7 +75,7 @@ public class KSFileController { ksIngestionInfo.setMaxNumberOfChunks(fileUploadDTO.getMaxNumberOfChunks()); ksIngestionInfo.setMinChunkSizeToEmbed(fileUploadDTO.getMinChunkSizeToEmbed()); - ksIngestionInfoRepository.save(ksIngestionInfo); + //ksIngestionInfoRepository.save(ksIngestionInfo); ksDocument.setIngestionInfo(ksIngestionInfo); ksDocumentREpository.save(ksDocument); diff --git a/src/main/java/com/olympus/apollo/controllers/TestController.java b/src/main/java/com/olympus/apollo/controllers/TestController.java index fa1aba5..de83cc9 100644 --- a/src/main/java/com/olympus/apollo/controllers/TestController.java +++ b/src/main/java/com/olympus/apollo/controllers/TestController.java @@ -122,7 +122,7 @@ public class TestController { } @PostMapping("/revenginnerapplication") - public ResponseEntity RevEngApplication(@RequestBody ApolloParseRequestDTO apolloParseRequestDTO){ + public ResponseEntity RevEngApplication(@RequestBody ApolloParseRequestDTO apolloParseRequestDTO){ reModuleService.callReverseEngModules(apolloParseRequestDTO); return ResponseEntity.accepted().body("Request to reverse engineering application is being processed"); } diff --git a/src/main/java/com/olympus/apollo/security/config/CorsConfig.java b/src/main/java/com/olympus/apollo/security/config/CorsConfig.java index f4f1dc7..d903956 100644 --- a/src/main/java/com/olympus/apollo/security/config/CorsConfig.java +++ b/src/main/java/com/olympus/apollo/security/config/CorsConfig.java @@ -15,6 +15,7 @@ public class CorsConfig implements WebMvcConfigurer { public void addCorsMappings(CorsRegistry registry) { registry.addMapping("/**") .allowedOrigins(apollo_frontend_url) + .allowedOriginPatterns("**") .allowedHeaders("*") .allowedMethods("GET", "POST", "PUT", "DELETE","OPTIONS"); } diff --git a/src/main/java/com/olympus/apollo/services/DeletionService.java b/src/main/java/com/olympus/apollo/services/DeletionService.java index f61ba9b..a3522c4 100644 --- a/src/main/java/com/olympus/apollo/services/DeletionService.java +++ b/src/main/java/com/olympus/apollo/services/DeletionService.java @@ -9,12 +9,15 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.olympus.dto.DeletionRequest; -import com.olympus.model.apollo.VectorStore; +import org.springframework.ai.document.Document; +import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.messaging.simp.SimpMessagingTemplate; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; +import org.springframework.ai.vectorstore.VectorStore; +import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; @@ -45,41 +48,33 @@ public class DeletionService { @Autowired private SimpMessagingTemplate simpMessagingTemplate; + @Autowired + private VectorStore vectorStore; + @Async("asyncTaskExecutor") public void deleteRecords(DeletionRequest deletionRequest) { try { - boolean KSDocumentExists = deletionRequest.getKsDocumentId() != null && !deletionRequest.getKsDocumentId().isEmpty() && ksDocumentRepository.existsById(deletionRequest.getKsDocumentId()); - boolean KSIngestionInfoExists = deletionRequest.getKsIngestionInfoId() != null && !deletionRequest.getKsIngestionInfoId().isEmpty() && ksIngestionInfoRepository.existsById(deletionRequest.getKsIngestionInfoId()); - boolean vectorStoreExists = deletionRequest.getKsApplicationName() != null && deletionRequest.getKsDocSource() != null && deletionRequest.getKsFileSource() != null && deletionRequest.getKsDoctype() != null; + + //TODO: COMPLETE REFACTOR REQUIRED TO DELETE RECORD FROM AZURE SEARCH + // NOT WORKING AT THE MOMENT + boolean KSDocumentExists = deletionRequest.getKsDocumentId() != null && + !deletionRequest.getKsDocumentId().isEmpty() && + ksDocumentRepository.existsById(deletionRequest.getKsDocumentId()); + if(KSDocumentExists){ + SearchRequest searchRequest = SearchRequest.defaults() + .withQuery("a").withTopK(1000) + .withSimilarityThreshold(0.0) + .withFilterExpression("KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'"); - List vectorStoreMetadataDetails = vectorStoreExists ? vectorStoreRepository.findDocumentVectorByMetadata(deletionRequest.getKsDoctype(), deletionRequest.getKsDocSource(), deletionRequest.getKsFileSource(), deletionRequest.getKsApplicationName()) : List.of(); + List docs = vectorStore.similaritySearch(searchRequest); + List ids = docs.stream().map(Document::getId).toList(); + vectorStore.delete(ids); - if (KSDocumentExists && KSIngestionInfoExists && !vectorStoreMetadataDetails.isEmpty()) { - if (deletionRequest.getKsDocumentId() != null && !deletionRequest.getKsDocumentId().isEmpty()) { - ksDocumentRepository.deleteById(deletionRequest.getKsDocumentId()); - logger.info("KSDocument with id {} deleted successfully.", deletionRequest.getKsDocumentId()+" "+Thread.currentThread().getName()); - } - - if (deletionRequest.getKsIngestionInfoId() != null && !deletionRequest.getKsIngestionInfoId().isEmpty()) { - ksIngestionInfoRepository.deleteById(deletionRequest.getKsIngestionInfoId()); - logger.info("KSIngestionInfo with id {} deleted successfully.", deletionRequest.getKsIngestionInfoId()+" "+Thread.currentThread().getName()); - } - - for (VectorStore store : vectorStoreMetadataDetails) { - vectorStoreRepository.deleteById(store.getId()); - logger.info("VectorStore with id {} deleted successfully.", store.getId()+" "+Thread.currentThread().getName()); - } - logger.info("All records deleted successfully."); - } else { - if (!KSDocumentExists) { - logger.warn("KSDocument with id {} does not exist.", deletionRequest.getKsDocumentId()+" "+Thread.currentThread().getName()); - } else if (!KSIngestionInfoExists) { - logger.warn("KSIngestionInfo with id {} does not exist.", deletionRequest.getKsIngestionInfoId()+" "+Thread.currentThread().getName()); - } else if (vectorStoreMetadataDetails.isEmpty()) { - logger.warn("No VectorStore Data available",Thread.currentThread().getName()); - - } + ksDocumentRepository.deleteById(deletionRequest.getKsDocumentId()); + logger.info("KSDocument with id {} deleted successfully.", deletionRequest.getKsDocumentId()); + }else{ + logger.warn("KSDocument with id {} does not exist.", deletionRequest.getKsDocumentId()); } } catch (Exception e) { logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName()); @@ -122,9 +117,9 @@ public class DeletionService { String ingestionStatus = ksGitInfo.getIngestionStatus(); logger.info("Ingestion Status is {}.", ingestionStatus); - List vectorStoreMetadataDetails = vectorStoreGitDetailsExists + List vectorStoreMetadataDetails = null; /*vectorStoreGitDetailsExists ? vectorStoreRepository.findGitVectorByMetadata(ksDoctype,ksDocSource, ksFileSource, applicationName, ksBranch) - : List.of(); + : List.of();*/ if (KSGitInfoExists && KSGitIngestionInfoExists) { deleteRecordsBasedOnIngestionStatus(ksGitInfoId,ksBranch,ingestionStatus,ksGitIngestionInfoId,vectorStoreMetadataDetails,applicationName); @@ -218,11 +213,12 @@ public class DeletionService { private void deleteVectorStores(List vectorStoreMetadataDetails, String applicationName){ if(!vectorStoreMetadataDetails.isEmpty()){ - for (VectorStore store : vectorStoreMetadataDetails) { + + /* for (VectorStore store : vectorStoreMetadataDetails) { String storeId=store.getId(); vectorStoreRepository.deleteById(storeId); logger.info("VectorStore with id {} deleted successfully.", applicationName, storeId); - } + }*/ } } @@ -233,7 +229,8 @@ public class DeletionService { try { boolean KSTextExists = ksTextsRepository.existsById(id); - List vectorStoreMetadataDetails = vectorStoreRepository.findByKsInternalMainEntityId(id); + /* + List vectorStoreMetadataDetails = vectorStoreRepository.findByKsInternalMainEntityId(id); if (KSTextExists && !vectorStoreMetadataDetails.isEmpty()) { for (VectorStore store : vectorStoreMetadataDetails) { @@ -252,7 +249,7 @@ public class DeletionService { } else if (vectorStoreMetadataDetails.isEmpty()) { logger.warn("No VectorStore Data available",Thread.currentThread().getName()); } - } + }*/ } catch (Exception e) { logger.error("An error occurred while deleting records: ", e+" "+Thread.currentThread().getName()); throw new RuntimeException("An error occurred while deleting records", e); diff --git a/src/main/java/com/olympus/apollo/services/KSIngestor.java b/src/main/java/com/olympus/apollo/services/KSIngestor.java index 2b27e93..28e92ce 100644 --- a/src/main/java/com/olympus/apollo/services/KSIngestor.java +++ b/src/main/java/com/olympus/apollo/services/KSIngestor.java @@ -28,15 +28,17 @@ public class KSIngestor { @Autowired private KSDocumentRepository ksDocumentRepository; + @Autowired private KSTextsRepository ksTextsRepository; - @Autowired - private KSIngestionInfoRepository ksIngestionInfoRepository; + @Autowired private FileSystemStorageService storageService; @Autowired private VectorStore vectorStore; + + Logger logger = LoggerFactory.getLogger(KSIngestor.class); public void deleteAll(String document_file_name) { @@ -48,49 +50,11 @@ public class KSIngestor { } public IngestionOutput ingestLoop() { - IngestionOutput ingestionLoopOutput = new IngestionOutput(); + try { ksDocumentRepository.findAllByIngestionStatus("NEW").forEach(ksDocument -> { - logger.info("Processing document: " + ksDocument.getFilePath()); - // ingest the document - ksDocument.setIngestionStatus("IN PROGRESS"); - ksDocumentRepository.save(ksDocument); - - Resource file = storageService.loadAsResource(ksDocument.getFilePath()); - TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(file); - - List docs = tikaDocumentReader.read(); - logger.info("Ingested document: " + ksDocument.getFilePath()); - logger.info("Number of documents: " + docs.size()); - - KSIngestionInfo ingestionInfo = ksDocument.getIngestionInfo(); - - - TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(), - ingestionInfo.getMinChunkSize(), - ingestionInfo.getMinChunkSizeToEmbed(), - ingestionInfo.getMaxNumberOfChunks(), - true); - - - docs.forEach(doc -> { - List splitDocs = splitter.split(doc); - - logger.info("Number of documents: " + splitDocs.size()); - for (Document splitDoc : splitDocs) { - logger.info("Split before put document metadata: " + splitDoc.getMetadata()); - splitDoc.getMetadata().putAll(getMetadata(ingestionInfo)); - logger.info("Split after put document metadata: " + splitDoc.getMetadata()); - } - embedDocuments(splitDocs, ingestionInfo); - }); - ksDocument.setIngestionStatus("INGESTED");//we have to set to DONE - ksDocument.setIngestionDate(new Date()); - ksDocument.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date())); - - ksDocumentRepository.save(ksDocument); - + ingestDocument(ksDocument); ingestionLoopOutput.getIngestedDocumentId().add(ksDocument.getId()); }); ingestionLoopOutput.setStatus("OK"); @@ -139,12 +103,15 @@ public class KSIngestor { ingestionInfo.getMaxNumberOfChunks(), true); + HashMap metadata = ingestionInfo.getMetadata(); + metadata.put("KsDocumentId",ksDocument.getId()); + docs.forEach(doc -> { List splitDocs = splitter.split(doc); logger.info("Number of documents: " + splitDocs.size()); for (Document splitDoc : splitDocs) { - splitDoc.getMetadata().putAll(getMetadata(ingestionInfo)); + splitDoc.getMetadata().putAll(metadata); } embedDocuments(splitDocs, ingestionInfo); }); @@ -251,11 +218,13 @@ public class KSIngestor { } public List testSimilaritySearch(String query,String filterQuery) { - List docs = vectorStore.similaritySearch( - SearchRequest.defaults() - .withQuery(query) - .withTopK(5).withSimilarityThreshold(0.8) - .withFilterExpression(filterQuery)); + SearchRequest searchRequest = SearchRequest.defaults().withQuery(query).withTopK(5).withSimilarityThreshold(0.1); + + if(filterQuery!=null && !filterQuery.isEmpty()){ + searchRequest.withFilterExpression(filterQuery); + } + + List docs = vectorStore.similaritySearch(searchRequest); List result = new ArrayList(); for (Document doc : docs) { @@ -265,19 +234,4 @@ public class KSIngestor { } - private HashMap getMetadata(KSIngestionInfo ingestionInfo) { - - return ingestionInfo.getMetadata(); - - /* HashMap metadata = new HashMap(); - - for (String meta : metadatas) { - String[] keyValue = meta.split(":"); - metadata.put(keyValue[0], keyValue[1]); - } - - return metadata;*/ - } - - } \ No newline at end of file diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 80d63e8..9bbf31d 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -10,20 +10,23 @@ spring: application: name: apollo ai: + azure: + openai: + endpoint: "https://ai-olympus.openai.azure.com/" + api-key: "9fb33cc69d914d4c8225b974876510b5" openai: - api-key: + api-key: "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" vectorstore: - mongodb: - uri: - indexName: vector_index - collection-name: vector_store - initialize-schema: false + azure: + api-key: "jxKqZvbMKuo1MwXs8ilEAeRDeswtoTXO1lWX600jP2AzSeDXo1nq" + url: "https://search-olympus.search.windows.net" + initialize-schema: true data: mongodb: - uri: - database: - username: - password: + uri: mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus + database: olympus + username: olympus_adm + password: 26111979 servlet: multipart: max-file-size: 5000000MB @@ -34,8 +37,14 @@ ingestion: repository: basepath: C:\\Users\\andrea.terzani\\dev\\Olympus gitlab: - token: + token: "xxxxxxxx" path: /mnt/apollo_storage/repository #C:\\repos\\olympus_ai\\gitClone + cloud: + url: "https://gi2tlab.com/api/v4" + token: "xxxxxxxx" + onpremises: + url: "http://localhost:8081/api" + token: "xxxxxxxx" eureka: client: