From 59ee7672c98475ae0c14f014d46ec9636f47fe37 Mon Sep 17 00:00:00 2001 From: "andrea.terzani" Date: Mon, 29 Jul 2024 12:41:29 +0200 Subject: [PATCH] moved to mongo --- README.md | 33 ++++++++++ pom.xml | 10 +-- .../apollo/controllers/KSFileController.java | 23 +++++-- .../com/olympus/apollo/models/KSDocument.java | 65 ++++--------------- .../apollo/models/KSIngestionInfo.java | 21 +++--- .../repository/KSDocumentRepository.java | 3 +- .../repository/KSIngestionInfoRepository.java | 2 +- .../olympus/apollo/services/KSIngestor.java | 17 +++-- .../apollo/services/StorageProperties.java | 2 +- src/main/resources/application.properties | 25 +++---- 10 files changed, 99 insertions(+), 102 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..039b1f5 --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ + + +#Vector search index +In a non dedicated istance the vectorSearch index must be created manually from Mongo Atlas interface. + +Create a new Search index called vector_index wit following definition + +{ + "fields": [ + { + "numDimensions": 1536, + "path": "embedding", + "similarity": "cosine", + "type": "vector" + }, + { + "path": "metadata.KsApplicatioName", + "type": "filter" + }, + { + "path": "metadata.KsDoSource", + "type": "filter" + }, + { + "path": "metadata.Source", + "type": "filter" + }, + { + "path": "metadata.KsDoctype", + "type": "filter" + } + ] +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index ab5d6a6..c85f318 100644 --- a/pom.xml +++ b/pom.xml @@ -30,19 +30,15 @@ 21 - - org.springframework.boot - spring-boot-starter-data-jpa - + org.springframework.boot spring-boot-starter-web - org.postgresql - postgresql - runtime + org.springframework.boot + spring-boot-starter-data-mongodb org.springframework.boot diff --git a/src/main/java/com/olympus/apollo/controllers/KSFileController.java b/src/main/java/com/olympus/apollo/controllers/KSFileController.java index e512c55..d7ca220 100644 --- a/src/main/java/com/olympus/apollo/controllers/KSFileController.java +++ b/src/main/java/com/olympus/apollo/controllers/KSFileController.java @@ -1,5 +1,9 @@ package com.olympus.apollo.controllers; +import java.util.HashMap; +import java.util.Map; + +import org.codelibs.jhighlight.fastutil.Hash; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; @@ -39,19 +43,24 @@ public class KSFileController { KSIngestionInfo ksIngestionInfo = new KSIngestionInfo(); ksIngestionInfo.setType("MD_DOCUMENT"); //TODO: This should be dynamic - ksIngestionInfo.setVdbIndex("atf_documentation"); - ksIngestionInfo.setMetadata(filePath); - ksIngestionInfoRepository.save(ksIngestionInfo); + + HashMap metadata = new HashMap<>(); + + metadata.put("KsApplicatioName","atf"); + metadata.put("KsDoctype","documentation"); + metadata.put("KsDoSource","wiki"); + metadata.put("Source",file.getOriginalFilename()); + + ksIngestionInfo.setMetadata(metadata); ksIngestionInfo.setDefaultChunkSize(1000); ksIngestionInfo.setMinChunkSize(200); ksIngestionInfo.setMaxNumberOfChunks(1000); ksIngestionInfo.setMinChunkSizeToEmbed(20); - + + ksIngestionInfoRepository.save(ksIngestionInfo); ksDocument.setIngestionInfo(ksIngestionInfo); - - - ksDocumentREpository.save(ksDocument); + return "OK"; } diff --git a/src/main/java/com/olympus/apollo/models/KSDocument.java b/src/main/java/com/olympus/apollo/models/KSDocument.java index 8073554..82bad74 100644 --- a/src/main/java/com/olympus/apollo/models/KSDocument.java +++ b/src/main/java/com/olympus/apollo/models/KSDocument.java @@ -1,19 +1,19 @@ package com.olympus.apollo.models; -import jakarta.persistence.Entity; -import jakarta.persistence.GeneratedValue; -import jakarta.persistence.Id; -import jakarta.persistence.OneToOne; +import org.springframework.data.mongodb.core.mapping.Document; + +import java.util.Date; + +import org.springframework.data.annotation.Id; import lombok.Getter; import lombok.Setter; -@Entity +@Document(collection = "ksdocuments") @Getter @Setter public class KSDocument { - @GeneratedValue - private @Id Long id; + private @Id String id; private String name; private String description; @@ -21,54 +21,13 @@ public class KSDocument { private String filePath; private String fileName; - @OneToOne - private KSIngestionInfo ingestionInfo; private String ingestionStatus; + private String ingestionMessage; + private Date ingestionDate; - public Long getId() { - return id; - } - public void setId(Long id) { - this.id = id; - } - public String getName() { - return name; - } - public void setName(String name) { - this.name = name; - } - public String getDescription() { - return description; - } - public void setDescription(String description) { - this.description = description; - } - - public String getFilePath() { - return filePath; - } - public void setFilePath(String filePath) { - this.filePath = filePath; - } - public String getFileName() { - return fileName; - } - public void setFileName(String fileName) { - this.fileName = fileName; - } - public KSIngestionInfo getIngestionInfo() { - return ingestionInfo; - } - public void setIngestionInfo(KSIngestionInfo ingestionInfo) { - this.ingestionInfo = ingestionInfo; - } - - public String getIngestionStatus() { - return ingestionStatus; - } - public void setIngestionStatus(String ingestionStatus) { - this.ingestionStatus = ingestionStatus; - } + private KSIngestionInfo ingestionInfo; + + } diff --git a/src/main/java/com/olympus/apollo/models/KSIngestionInfo.java b/src/main/java/com/olympus/apollo/models/KSIngestionInfo.java index ec81149..c985534 100644 --- a/src/main/java/com/olympus/apollo/models/KSIngestionInfo.java +++ b/src/main/java/com/olympus/apollo/models/KSIngestionInfo.java @@ -1,31 +1,30 @@ package com.olympus.apollo.models; -import jakarta.persistence.Entity; -import jakarta.persistence.GeneratedValue; -import jakarta.persistence.Id; +import java.util.HashMap; + +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; + + import lombok.Getter; import lombok.Setter; -@Entity +@Document(collection = "ksingestioninfo") @Getter @Setter public class KSIngestionInfo { - @GeneratedValue - private @Id Long id; + private @Id String id; - private String ingestionMessage; - private String ingestionDate; - - private String vdbIndex; private String type; - private String metadata; + private HashMap metadata; private int minChunkSizeToEmbed; private int maxNumberOfChunks; private int minChunkSize; private int defaultChunkSize; + } diff --git a/src/main/java/com/olympus/apollo/repository/KSDocumentRepository.java b/src/main/java/com/olympus/apollo/repository/KSDocumentRepository.java index a6ef6e6..4a14147 100644 --- a/src/main/java/com/olympus/apollo/repository/KSDocumentRepository.java +++ b/src/main/java/com/olympus/apollo/repository/KSDocumentRepository.java @@ -1,13 +1,12 @@ package com.olympus.apollo.repository; -import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.repository.CrudRepository; import org.springframework.stereotype.Repository; import com.olympus.apollo.models.KSDocument; @Repository -public interface KSDocumentRepository extends CrudRepository { +public interface KSDocumentRepository extends CrudRepository { public Iterable findAllByIngestionStatus(String status); } diff --git a/src/main/java/com/olympus/apollo/repository/KSIngestionInfoRepository.java b/src/main/java/com/olympus/apollo/repository/KSIngestionInfoRepository.java index da76c80..0052b90 100644 --- a/src/main/java/com/olympus/apollo/repository/KSIngestionInfoRepository.java +++ b/src/main/java/com/olympus/apollo/repository/KSIngestionInfoRepository.java @@ -6,6 +6,6 @@ import org.springframework.stereotype.Repository; import com.olympus.apollo.models.KSIngestionInfo; @Repository -public interface KSIngestionInfoRepository extends CrudRepository { +public interface KSIngestionInfoRepository extends CrudRepository { } diff --git a/src/main/java/com/olympus/apollo/services/KSIngestor.java b/src/main/java/com/olympus/apollo/services/KSIngestor.java index 17d33eb..618775e 100644 --- a/src/main/java/com/olympus/apollo/services/KSIngestor.java +++ b/src/main/java/com/olympus/apollo/services/KSIngestor.java @@ -1,6 +1,7 @@ package com.olympus.apollo.services; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.List; @@ -19,6 +20,7 @@ import org.springframework.stereotype.Service; import com.olympus.apollo.models.KSIngestionInfo; import com.olympus.apollo.repository.KSDocumentRepository; +import com.olympus.apollo.repository.KSIngestionInfoRepository; @Service @@ -27,6 +29,8 @@ public class KSIngestor { @Autowired private KSDocumentRepository ksDocumentRepository; @Autowired + private KSIngestionInfoRepository ksIngestionInfoRepository; + @Autowired private FileSystemStorageService storageService; @Autowired @@ -77,8 +81,9 @@ public class KSIngestor { } embedDocuments(splitDocs, ingestionInfo); }); - - ksDocument.setIngestionStatus("NEW"); + ksDocument.setIngestionStatus("NEW"); + ksDocument.setIngestionDate(new Date()); + ksDocumentRepository.save(ksDocument); }); @@ -111,15 +116,17 @@ public class KSIngestor { private HashMap getMetadata(KSIngestionInfo ingestionInfo) { - HashMap metadata = new HashMap(); - String[] metadatas = ingestionInfo.getMetadata().split(";"); + return ingestionInfo.getMetadata(); + + /* HashMap metadata = new HashMap(); + for (String meta : metadatas) { String[] keyValue = meta.split(":"); metadata.put(keyValue[0], keyValue[1]); } - return metadata; + return metadata;*/ } diff --git a/src/main/java/com/olympus/apollo/services/StorageProperties.java b/src/main/java/com/olympus/apollo/services/StorageProperties.java index 61affcf..fab5fb6 100644 --- a/src/main/java/com/olympus/apollo/services/StorageProperties.java +++ b/src/main/java/com/olympus/apollo/services/StorageProperties.java @@ -7,7 +7,7 @@ public class StorageProperties { /** * Folder location for storing files */ - private String location = "/Users/andreaterzani/Desktop/dev/olympus/upload-dir"; + private String location = "C:\\Users\\andrea.terzani\\dev\\olympus\\upload-dir"; public String getLocation() { return location; diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 9e66940..d7eda1e 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,30 +1,25 @@ spring.application.name=apollo +server.port=8082 +#spring.jpa.show-sql=true +#spring.jpa.hibernate.ddl-auto=update +#spring.datasource.url=jdbc:postgresql://localhost:5432/olympus +#spring.datasource.username=andreaterzani +#spring.datasource.password=26111979 +#spring.datasource.driver-class-name=org.postgresql.Driver +#spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.PostgreSQLDialect -spring.jpa.show-sql=true -spring.jpa.hibernate.ddl-auto=update -spring.datasource.url=jdbc:postgresql://localhost:5432/olympus -spring.datasource.username=andreaterzani -spring.datasource.password=26111979 -spring.datasource.driver-class-name=org.postgresql.Driver -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.PostgreSQLDialect spring.ai.vectorstore.mongodb.uri=mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus -spring.ai.vectorstore.mongodb.initialize-schema=false -spring.ai.vectorstore.mongodb.database=olympus -spring.ai.vectorstore.mongodb.username=olympus_adm -spring.ai.vectorstore.mongodb.password=26111979 - spring.data.mongodb.uri=mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus spring.data.mongodb.database=olympus spring.data.mongodb.username=olympus_adm -spring.data.mongodb.password=XXXXXX +spring.data.mongodb.password=26111979 spring.ai.vectorstore.mongodb.indexName=vector_index spring.ai.vectorstore.mongodb.collection-name=vector_store spring.ai.vectorstore.mongodb.initialize-schema=false - # API key if needed, e.g. OpenAI -spring.ai.openai.api-key=sk-proj-k4jrXXXUYQN8yQG2vNmWT3BlbkFJ0Ge9EfKcrMxduVFQZlyO +spring.ai.openai.api-key=sk-proj-j3TFJ0h348DIzMrYYfyUT3BlbkFJjk4HMc8A2ux2Asg8Y7H1