moved to mongo

This commit is contained in:
andrea.terzani
2024-07-29 12:41:29 +02:00
parent f93b20293c
commit 59ee7672c9
10 changed files with 99 additions and 102 deletions

33
README.md Normal file
View File

@@ -0,0 +1,33 @@
#Vector search index
In a non dedicated istance the vectorSearch index must be created manually from Mongo Atlas interface.
Create a new Search index called vector_index wit following definition
{
"fields": [
{
"numDimensions": 1536,
"path": "embedding",
"similarity": "cosine",
"type": "vector"
},
{
"path": "metadata.KsApplicatioName",
"type": "filter"
},
{
"path": "metadata.KsDoSource",
"type": "filter"
},
{
"path": "metadata.Source",
"type": "filter"
},
{
"path": "metadata.KsDoctype",
"type": "filter"
}
]
}

10
pom.xml
View File

@@ -30,19 +30,15 @@
<java.version>21</java.version> <java.version>21</java.version>
</properties> </properties>
<dependencies> <dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId> <artifactId>spring-boot-starter-web</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.postgresql</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>postgresql</artifactId> <artifactId>spring-boot-starter-data-mongodb</artifactId>
<scope>runtime</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>

View File

@@ -1,5 +1,9 @@
package com.olympus.apollo.controllers; package com.olympus.apollo.controllers;
import java.util.HashMap;
import java.util.Map;
import org.codelibs.jhighlight.fastutil.Hash;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity; import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller; import org.springframework.stereotype.Controller;
@@ -39,19 +43,24 @@ public class KSFileController {
KSIngestionInfo ksIngestionInfo = new KSIngestionInfo(); KSIngestionInfo ksIngestionInfo = new KSIngestionInfo();
ksIngestionInfo.setType("MD_DOCUMENT"); //TODO: This should be dynamic ksIngestionInfo.setType("MD_DOCUMENT"); //TODO: This should be dynamic
ksIngestionInfo.setVdbIndex("atf_documentation");
ksIngestionInfo.setMetadata(filePath); HashMap<String, String> metadata = new HashMap<>();
ksIngestionInfoRepository.save(ksIngestionInfo);
metadata.put("KsApplicatioName","atf");
metadata.put("KsDoctype","documentation");
metadata.put("KsDoSource","wiki");
metadata.put("Source",file.getOriginalFilename());
ksIngestionInfo.setMetadata(metadata);
ksIngestionInfo.setDefaultChunkSize(1000); ksIngestionInfo.setDefaultChunkSize(1000);
ksIngestionInfo.setMinChunkSize(200); ksIngestionInfo.setMinChunkSize(200);
ksIngestionInfo.setMaxNumberOfChunks(1000); ksIngestionInfo.setMaxNumberOfChunks(1000);
ksIngestionInfo.setMinChunkSizeToEmbed(20); ksIngestionInfo.setMinChunkSizeToEmbed(20);
ksIngestionInfoRepository.save(ksIngestionInfo);
ksDocument.setIngestionInfo(ksIngestionInfo); ksDocument.setIngestionInfo(ksIngestionInfo);
ksDocumentREpository.save(ksDocument); ksDocumentREpository.save(ksDocument);
return "OK"; return "OK";
} }

View File

@@ -1,19 +1,19 @@
package com.olympus.apollo.models; package com.olympus.apollo.models;
import jakarta.persistence.Entity; import org.springframework.data.mongodb.core.mapping.Document;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.Id; import java.util.Date;
import jakarta.persistence.OneToOne;
import org.springframework.data.annotation.Id;
import lombok.Getter; import lombok.Getter;
import lombok.Setter; import lombok.Setter;
@Entity @Document(collection = "ksdocuments")
@Getter @Setter @Getter @Setter
public class KSDocument { public class KSDocument {
@GeneratedValue private @Id String id;
private @Id Long id;
private String name; private String name;
private String description; private String description;
@@ -21,54 +21,13 @@ public class KSDocument {
private String filePath; private String filePath;
private String fileName; private String fileName;
@OneToOne
private KSIngestionInfo ingestionInfo;
private String ingestionStatus; private String ingestionStatus;
private String ingestionMessage;
private Date ingestionDate;
public Long getId() { private KSIngestionInfo ingestionInfo;
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getFilePath() {
return filePath;
}
public void setFilePath(String filePath) {
this.filePath = filePath;
}
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
public KSIngestionInfo getIngestionInfo() {
return ingestionInfo;
}
public void setIngestionInfo(KSIngestionInfo ingestionInfo) {
this.ingestionInfo = ingestionInfo;
}
public String getIngestionStatus() {
return ingestionStatus;
}
public void setIngestionStatus(String ingestionStatus) {
this.ingestionStatus = ingestionStatus;
}
} }

View File

@@ -1,31 +1,30 @@
package com.olympus.apollo.models; package com.olympus.apollo.models;
import jakarta.persistence.Entity; import java.util.HashMap;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.Id; import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import lombok.Getter; import lombok.Getter;
import lombok.Setter; import lombok.Setter;
@Entity @Document(collection = "ksingestioninfo")
@Getter @Setter @Getter @Setter
public class KSIngestionInfo { public class KSIngestionInfo {
@GeneratedValue private @Id String id;
private @Id Long id;
private String ingestionMessage;
private String ingestionDate;
private String vdbIndex;
private String type; private String type;
private String metadata; private HashMap<String,String> metadata;
private int minChunkSizeToEmbed; private int minChunkSizeToEmbed;
private int maxNumberOfChunks; private int maxNumberOfChunks;
private int minChunkSize; private int minChunkSize;
private int defaultChunkSize; private int defaultChunkSize;
} }

View File

@@ -1,13 +1,12 @@
package com.olympus.apollo.repository; package com.olympus.apollo.repository;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.repository.CrudRepository; import org.springframework.data.repository.CrudRepository;
import org.springframework.stereotype.Repository; import org.springframework.stereotype.Repository;
import com.olympus.apollo.models.KSDocument; import com.olympus.apollo.models.KSDocument;
@Repository @Repository
public interface KSDocumentRepository extends CrudRepository<KSDocument, Long> { public interface KSDocumentRepository extends CrudRepository<KSDocument, String> {
public Iterable<KSDocument> findAllByIngestionStatus(String status); public Iterable<KSDocument> findAllByIngestionStatus(String status);
} }

View File

@@ -6,6 +6,6 @@ import org.springframework.stereotype.Repository;
import com.olympus.apollo.models.KSIngestionInfo; import com.olympus.apollo.models.KSIngestionInfo;
@Repository @Repository
public interface KSIngestionInfoRepository extends CrudRepository<KSIngestionInfo, Long> { public interface KSIngestionInfoRepository extends CrudRepository<KSIngestionInfo, String> {
} }

View File

@@ -1,6 +1,7 @@
package com.olympus.apollo.services; package com.olympus.apollo.services;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@@ -19,6 +20,7 @@ import org.springframework.stereotype.Service;
import com.olympus.apollo.models.KSIngestionInfo; import com.olympus.apollo.models.KSIngestionInfo;
import com.olympus.apollo.repository.KSDocumentRepository; import com.olympus.apollo.repository.KSDocumentRepository;
import com.olympus.apollo.repository.KSIngestionInfoRepository;
@Service @Service
@@ -27,6 +29,8 @@ public class KSIngestor {
@Autowired @Autowired
private KSDocumentRepository ksDocumentRepository; private KSDocumentRepository ksDocumentRepository;
@Autowired @Autowired
private KSIngestionInfoRepository ksIngestionInfoRepository;
@Autowired
private FileSystemStorageService storageService; private FileSystemStorageService storageService;
@Autowired @Autowired
@@ -77,8 +81,9 @@ public class KSIngestor {
} }
embedDocuments(splitDocs, ingestionInfo); embedDocuments(splitDocs, ingestionInfo);
}); });
ksDocument.setIngestionStatus("NEW");
ksDocument.setIngestionStatus("NEW"); ksDocument.setIngestionDate(new Date());
ksDocumentRepository.save(ksDocument); ksDocumentRepository.save(ksDocument);
}); });
@@ -111,15 +116,17 @@ public class KSIngestor {
private HashMap<String, String> getMetadata(KSIngestionInfo ingestionInfo) { private HashMap<String, String> getMetadata(KSIngestionInfo ingestionInfo) {
HashMap<String, String> metadata = new HashMap<String, String>();
String[] metadatas = ingestionInfo.getMetadata().split(";"); return ingestionInfo.getMetadata();
/* HashMap<String, String> metadata = new HashMap<String, String>();
for (String meta : metadatas) { for (String meta : metadatas) {
String[] keyValue = meta.split(":"); String[] keyValue = meta.split(":");
metadata.put(keyValue[0], keyValue[1]); metadata.put(keyValue[0], keyValue[1]);
} }
return metadata; return metadata;*/
} }

View File

@@ -7,7 +7,7 @@ public class StorageProperties {
/** /**
* Folder location for storing files * Folder location for storing files
*/ */
private String location = "/Users/andreaterzani/Desktop/dev/olympus/upload-dir"; private String location = "C:\\Users\\andrea.terzani\\dev\\olympus\\upload-dir";
public String getLocation() { public String getLocation() {
return location; return location;

View File

@@ -1,30 +1,25 @@
spring.application.name=apollo spring.application.name=apollo
server.port=8082
#spring.jpa.show-sql=true
#spring.jpa.hibernate.ddl-auto=update
#spring.datasource.url=jdbc:postgresql://localhost:5432/olympus
#spring.datasource.username=andreaterzani
#spring.datasource.password=26111979
#spring.datasource.driver-class-name=org.postgresql.Driver
#spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.PostgreSQLDialect
spring.jpa.show-sql=true
spring.jpa.hibernate.ddl-auto=update
spring.datasource.url=jdbc:postgresql://localhost:5432/olympus
spring.datasource.username=andreaterzani
spring.datasource.password=26111979
spring.datasource.driver-class-name=org.postgresql.Driver
spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.PostgreSQLDialect
spring.ai.vectorstore.mongodb.uri=mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus spring.ai.vectorstore.mongodb.uri=mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus
spring.ai.vectorstore.mongodb.initialize-schema=false
spring.ai.vectorstore.mongodb.database=olympus
spring.ai.vectorstore.mongodb.username=olympus_adm
spring.ai.vectorstore.mongodb.password=26111979
spring.data.mongodb.uri=mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus spring.data.mongodb.uri=mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus
spring.data.mongodb.database=olympus spring.data.mongodb.database=olympus
spring.data.mongodb.username=olympus_adm spring.data.mongodb.username=olympus_adm
spring.data.mongodb.password=XXXXXX spring.data.mongodb.password=26111979
spring.ai.vectorstore.mongodb.indexName=vector_index spring.ai.vectorstore.mongodb.indexName=vector_index
spring.ai.vectorstore.mongodb.collection-name=vector_store spring.ai.vectorstore.mongodb.collection-name=vector_store
spring.ai.vectorstore.mongodb.initialize-schema=false spring.ai.vectorstore.mongodb.initialize-schema=false
# API key if needed, e.g. OpenAI # API key if needed, e.g. OpenAI
spring.ai.openai.api-key=sk-proj-k4jrXXXUYQN8yQG2vNmWT3BlbkFJ0Ge9EfKcrMxduVFQZlyO spring.ai.openai.api-key=sk-proj-j3TFJ0h348DIzMrYYfyUT3BlbkFJjk4HMc8A2ux2Asg8Y7H1