Initial commit

This commit is contained in:
andrea.terzani
2024-07-29 08:49:58 +02:00
commit f93b20293c
21 changed files with 1254 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
package com.olympus.apollo;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import com.olympus.apollo.services.StorageProperties;
@SpringBootApplication
@EnableConfigurationProperties(StorageProperties.class)
public class ApolloApplication {
public static void main(String[] args) {
SpringApplication.run(ApolloApplication.class, args);
}
}

View File

@@ -0,0 +1,45 @@
package com.olympus.apollo.config;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.openai.OpenAiEmbeddingModel;
import org.springframework.ai.openai.api.OpenAiApi;
import org.springframework.ai.vectorstore.MongoDBAtlasVectorStore;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringBootConfiguration;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.mongodb.core.MongoTemplate;
@Configuration
@SpringBootConfiguration
@EnableAutoConfiguration
public class EmbeddingConfig {
@Value("${spring.ai.openai.api-key}")
private String openAiKey;
@Value("${spring.data.mongodb.database}")
private String databaseName;
@Value("${spring.ai.vectorstore.mongodb.collection-name:vector_store}")
private String collectionName;
@Value("${spring.ai.vectorstore.mongodb.indexName:vector_index}")
private String indexName;
@Value("${spring.data.mongodb.uri}")
private String mongoUri;
@Value("${spring.ai.vectorstore.mongodb.initialize-schema}")
private Boolean initSchema;
// Add beans here...
@Bean
public EmbeddingModel embeddingModel() {
return new OpenAiEmbeddingModel(new OpenAiApi(openAiKey));
}
@Bean
public VectorStore mongodbVectorStore(MongoTemplate mongoTemplate, EmbeddingModel embeddingModel) {
return new MongoDBAtlasVectorStore(mongoTemplate, embeddingModel,
MongoDBAtlasVectorStore.MongoDBVectorStoreConfig.builder().build(), initSchema);
}
}

View File

@@ -0,0 +1,62 @@
package com.olympus.apollo.controllers;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.mvc.support.RedirectAttributes;
import com.olympus.apollo.models.KSDocument;
import com.olympus.apollo.models.KSIngestionInfo;
import com.olympus.apollo.repository.KSDocumentRepository;
import com.olympus.apollo.repository.KSIngestionInfoRepository;
import com.olympus.apollo.services.StorageFileNotFoundException;
import com.olympus.apollo.services.StorageService;
@Controller
public class KSFileController {
@Autowired
private StorageService storageService;
@Autowired
private KSDocumentRepository ksDocumentREpository;
@Autowired
private KSIngestionInfoRepository ksIngestionInfoRepository;
@PostMapping("/upload")
public String handleFileUpload(@RequestParam("file") MultipartFile file) {
String filePath = storageService.store(file);
KSDocument ksDocument = new KSDocument();
ksDocument.setFilePath(filePath);
ksDocument.setFileName(file.getOriginalFilename());
ksDocument.setName(file.getOriginalFilename());
ksDocument.setDescription("Uploaded file");
ksDocument.setIngestionStatus("NEW");
KSIngestionInfo ksIngestionInfo = new KSIngestionInfo();
ksIngestionInfo.setType("MD_DOCUMENT"); //TODO: This should be dynamic
ksIngestionInfo.setVdbIndex("atf_documentation");
ksIngestionInfo.setMetadata(filePath);
ksIngestionInfoRepository.save(ksIngestionInfo);
ksIngestionInfo.setDefaultChunkSize(1000);
ksIngestionInfo.setMinChunkSize(200);
ksIngestionInfo.setMaxNumberOfChunks(1000);
ksIngestionInfo.setMinChunkSizeToEmbed(20);
ksDocument.setIngestionInfo(ksIngestionInfo);
ksDocumentREpository.save(ksDocument);
return "OK";
}
@ExceptionHandler(StorageFileNotFoundException.class)
public ResponseEntity<?> handleStorageFileNotFound(StorageFileNotFoundException exc) {
return ResponseEntity.notFound().build();
}
}

View File

@@ -0,0 +1,38 @@
package com.olympus.apollo.controllers;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RestController;
import com.olympus.apollo.services.KSIngestor;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
@RestController
public class TestController {
@Autowired
KSIngestor ksIngestor;
@GetMapping("test/ingestion_loop")
public String testIngestionLoop() {
ksIngestor.ingestLoop();
return "Ingestion Loop Completed";
}
@GetMapping("test/query_vector")
public List<String> testSimilaritySearch(@RequestParam String query) {
return ksIngestor.testSimilaritySearch(query,"documentation");
}
@GetMapping("test/delete")
public String deleteAllFromVectore(@RequestParam String query) {
ksIngestor.deleteAll("3-automated-test-framework---atf.md");
return "Deleted";
}
}

View File

@@ -0,0 +1,74 @@
package com.olympus.apollo.models;
import jakarta.persistence.Entity;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.Id;
import jakarta.persistence.OneToOne;
import lombok.Getter;
import lombok.Setter;
@Entity
@Getter @Setter
public class KSDocument {
@GeneratedValue
private @Id Long id;
private String name;
private String description;
private String filePath;
private String fileName;
@OneToOne
private KSIngestionInfo ingestionInfo;
private String ingestionStatus;
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getFilePath() {
return filePath;
}
public void setFilePath(String filePath) {
this.filePath = filePath;
}
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
public KSIngestionInfo getIngestionInfo() {
return ingestionInfo;
}
public void setIngestionInfo(KSIngestionInfo ingestionInfo) {
this.ingestionInfo = ingestionInfo;
}
public String getIngestionStatus() {
return ingestionStatus;
}
public void setIngestionStatus(String ingestionStatus) {
this.ingestionStatus = ingestionStatus;
}
}

View File

@@ -0,0 +1,31 @@
package com.olympus.apollo.models;
import jakarta.persistence.Entity;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.Id;
import lombok.Getter;
import lombok.Setter;
@Entity
@Getter @Setter
public class KSIngestionInfo {
@GeneratedValue
private @Id Long id;
private String ingestionMessage;
private String ingestionDate;
private String vdbIndex;
private String type;
private String metadata;
private int minChunkSizeToEmbed;
private int maxNumberOfChunks;
private int minChunkSize;
private int defaultChunkSize;
}

View File

@@ -0,0 +1,13 @@
package com.olympus.apollo.repository;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.repository.CrudRepository;
import org.springframework.stereotype.Repository;
import com.olympus.apollo.models.KSDocument;
@Repository
public interface KSDocumentRepository extends CrudRepository<KSDocument, Long> {
public Iterable<KSDocument> findAllByIngestionStatus(String status);
}

View File

@@ -0,0 +1,11 @@
package com.olympus.apollo.repository;
import org.springframework.data.repository.CrudRepository;
import org.springframework.stereotype.Repository;
import com.olympus.apollo.models.KSIngestionInfo;
@Repository
public interface KSIngestionInfoRepository extends CrudRepository<KSIngestionInfo, Long> {
}

View File

@@ -0,0 +1,114 @@
package com.olympus.apollo.services;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.stream.Stream;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.core.io.UrlResource;
import org.springframework.stereotype.Service;
import org.springframework.util.FileSystemUtils;
import org.springframework.web.multipart.MultipartFile;
@Service
public class FileSystemStorageService implements StorageService {
private final Path rootLocation;
@Autowired
public FileSystemStorageService(StorageProperties properties) {
if(properties.getLocation().trim().length() == 0){
throw new StorageException("File upload location can not be Empty.");
}
this.rootLocation = Paths.get(properties.getLocation());
}
@Override
public String store(MultipartFile file) {
String destinationFileString=null;
try {
if (file.isEmpty()) {
throw new StorageException("Failed to store empty file.");
}
Path destinationFile = this.rootLocation.resolve(
Paths.get(file.getOriginalFilename()))
.normalize().toAbsolutePath();
if (!destinationFile.getParent().equals(this.rootLocation.toAbsolutePath())) {
// This is a security check
throw new StorageException(
"Cannot store file outside current directory.");
}
try (InputStream inputStream = file.getInputStream()) {
Files.copy(inputStream, destinationFile,
StandardCopyOption.REPLACE_EXISTING);
destinationFileString=destinationFile.toString();
}
}
catch (IOException e) {
throw new StorageException("Failed to store file.", e);
}
return destinationFileString;
}
@Override
public Stream<Path> loadAll() {
try {
return Files.walk(this.rootLocation, 1)
.filter(path -> !path.equals(this.rootLocation))
.map(this.rootLocation::relativize);
}
catch (IOException e) {
throw new StorageException("Failed to read stored files", e);
}
}
@Override
public Path load(String filename) {
return rootLocation.resolve(filename);
}
@Override
public Resource loadAsResource(String filename) {
try {
Path file = load(filename);
Resource resource = new UrlResource(file.toUri());
if (resource.exists() || resource.isReadable()) {
return resource;
}
else {
throw new StorageFileNotFoundException(
"Could not read file: " + filename);
}
}
catch (MalformedURLException e) {
throw new StorageFileNotFoundException("Could not read file: " + filename, e);
}
}
@Override
public void deleteAll() {
FileSystemUtils.deleteRecursively(rootLocation.toFile());
}
@Override
public void init() {
try {
Files.createDirectories(rootLocation);
}
catch (IOException e) {
throw new StorageException("Could not initialize storage", e);
}
}
}

View File

@@ -0,0 +1,126 @@
package com.olympus.apollo.services;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.codelibs.jhighlight.fastutil.Hash;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.transformer.KeywordMetadataEnricher;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.stereotype.Service;
import com.olympus.apollo.models.KSIngestionInfo;
import com.olympus.apollo.repository.KSDocumentRepository;
@Service
public class KSIngestor {
@Autowired
private KSDocumentRepository ksDocumentRepository;
@Autowired
private FileSystemStorageService storageService;
@Autowired
private VectorStore vectorStore;
Logger logger = LoggerFactory.getLogger(KSIngestor.class);
public void deleteAll(String document_file_name) {
List<Document> docToDelete = vectorStore.similaritySearch(SearchRequest.defaults().withQuery("*")
.withSimilarityThreshold(0.0)
.withFilterExpression("'source'=='3-automated-test-framework---atf.md'"));
logger.info("Number of documents to delete: " + docToDelete.size());
}
public void ingestLoop() {
ksDocumentRepository.findAllByIngestionStatus("NEW").forEach(ksDocument -> {
// ingest the document
ksDocument.setIngestionStatus("IN PROGRESS");
ksDocumentRepository.save(ksDocument);
Resource file = storageService.loadAsResource(ksDocument.getFilePath());
TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(file);
List<Document> docs = tikaDocumentReader.read();
logger.info("Ingested document: " + ksDocument.getFilePath());
logger.info("Number of documents: " + docs.size());
KSIngestionInfo ingestionInfo = ksDocument.getIngestionInfo();
TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(),
ingestionInfo.getMinChunkSize(),
ingestionInfo.getMinChunkSizeToEmbed(),
ingestionInfo.getMaxNumberOfChunks(),
true);
docs.forEach(doc -> {
List<Document> splitDocs = splitter.split(doc);
logger.info("Number of documents: " + splitDocs.size());
for (Document splitDoc : splitDocs) {
splitDoc.getMetadata().putAll(getMetadata(ingestionInfo));
}
embedDocuments(splitDocs, ingestionInfo);
});
ksDocument.setIngestionStatus("NEW");
ksDocumentRepository.save(ksDocument);
});
}
private void embedDocuments(List<Document> docs, KSIngestionInfo ingestionInfo) {
logger.info("Embedding documents");
vectorStore.add(docs);
logger.info("Documents embedded");
}
public List<String> testSimilaritySearch(String query,String filter_doc_type) {
List<Document> docs = vectorStore.similaritySearch(
SearchRequest.defaults()
.withQuery(query)
.withTopK(5).withSimilarityThreshold(0.8)
.withFilterExpression("'ks_document_type'=='"+filter_doc_type+"'"));
List<String> result = new ArrayList<String>();
for (Document doc : docs) {
result.add(doc.getContent());
}
return result;
}
private HashMap<String, String> getMetadata(KSIngestionInfo ingestionInfo) {
HashMap<String, String> metadata = new HashMap<String, String>();
String[] metadatas = ingestionInfo.getMetadata().split(";");
for (String meta : metadatas) {
String[] keyValue = meta.split(":");
metadata.put(keyValue[0], keyValue[1]);
}
return metadata;
}
}

View File

@@ -0,0 +1,11 @@
package com.olympus.apollo.services;
public class StorageException extends RuntimeException {
public StorageException(String message) {
super(message);
}
public StorageException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@@ -0,0 +1,12 @@
package com.olympus.apollo.services;
public class StorageFileNotFoundException extends StorageException {
public StorageFileNotFoundException(String message) {
super(message);
}
public StorageFileNotFoundException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@@ -0,0 +1,20 @@
package com.olympus.apollo.services;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties("storage")
public class StorageProperties {
/**
* Folder location for storing files
*/
private String location = "/Users/andreaterzani/Desktop/dev/olympus/upload-dir";
public String getLocation() {
return location;
}
public void setLocation(String location) {
this.location = location;
}
}

View File

@@ -0,0 +1,22 @@
package com.olympus.apollo.services;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.core.io.Resource;
import java.nio.file.Path;
import java.util.stream.Stream;
public interface StorageService {
void init();
String store(MultipartFile file);
Stream<Path> loadAll();
Path load(String filename);
Resource loadAsResource(String filename);
void deleteAll();
}

View File

@@ -0,0 +1,30 @@
spring.application.name=apollo
spring.jpa.show-sql=true
spring.jpa.hibernate.ddl-auto=update
spring.datasource.url=jdbc:postgresql://localhost:5432/olympus
spring.datasource.username=andreaterzani
spring.datasource.password=26111979
spring.datasource.driver-class-name=org.postgresql.Driver
spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.PostgreSQLDialect
spring.ai.vectorstore.mongodb.uri=mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus
spring.ai.vectorstore.mongodb.initialize-schema=false
spring.ai.vectorstore.mongodb.database=olympus
spring.ai.vectorstore.mongodb.username=olympus_adm
spring.ai.vectorstore.mongodb.password=26111979
spring.data.mongodb.uri=mongodb+srv://olympus_adm:26111979@olympus.l6qor4p.mongodb.net/?retryWrites=true&w=majority&appName=Olympus
spring.data.mongodb.database=olympus
spring.data.mongodb.username=olympus_adm
spring.data.mongodb.password=XXXXXX
spring.ai.vectorstore.mongodb.indexName=vector_index
spring.ai.vectorstore.mongodb.collection-name=vector_store
spring.ai.vectorstore.mongodb.initialize-schema=false
# API key if needed, e.g. OpenAI
spring.ai.openai.api-key=sk-proj-k4jrXXXUYQN8yQG2vNmWT3BlbkFJ0Ge9EfKcrMxduVFQZlyO

View File

@@ -0,0 +1,13 @@
package com.olympus.apollo;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest
class ApolloApplicationTests {
@Test
void contextLoads() {
}
}