Initial commit
This commit is contained in:
17
src/main/java/com/olympus/apollo/ApolloApplication.java
Normal file
17
src/main/java/com/olympus/apollo/ApolloApplication.java
Normal file
@@ -0,0 +1,17 @@
|
||||
package com.olympus.apollo;
|
||||
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
|
||||
import com.olympus.apollo.services.StorageProperties;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableConfigurationProperties(StorageProperties.class)
|
||||
public class ApolloApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(ApolloApplication.class, args);
|
||||
}
|
||||
|
||||
}
|
||||
45
src/main/java/com/olympus/apollo/config/EmbeddingConfig.java
Normal file
45
src/main/java/com/olympus/apollo/config/EmbeddingConfig.java
Normal file
@@ -0,0 +1,45 @@
|
||||
package com.olympus.apollo.config;
|
||||
|
||||
import org.springframework.ai.embedding.EmbeddingModel;
|
||||
import org.springframework.ai.openai.OpenAiEmbeddingModel;
|
||||
import org.springframework.ai.openai.api.OpenAiApi;
|
||||
import org.springframework.ai.vectorstore.MongoDBAtlasVectorStore;
|
||||
import org.springframework.ai.vectorstore.VectorStore;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.SpringBootConfiguration;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.data.mongodb.core.MongoTemplate;
|
||||
|
||||
@Configuration
|
||||
@SpringBootConfiguration
|
||||
@EnableAutoConfiguration
|
||||
public class EmbeddingConfig {
|
||||
@Value("${spring.ai.openai.api-key}")
|
||||
private String openAiKey;
|
||||
@Value("${spring.data.mongodb.database}")
|
||||
private String databaseName;
|
||||
@Value("${spring.ai.vectorstore.mongodb.collection-name:vector_store}")
|
||||
private String collectionName;
|
||||
@Value("${spring.ai.vectorstore.mongodb.indexName:vector_index}")
|
||||
private String indexName;
|
||||
@Value("${spring.data.mongodb.uri}")
|
||||
private String mongoUri;
|
||||
@Value("${spring.ai.vectorstore.mongodb.initialize-schema}")
|
||||
private Boolean initSchema;
|
||||
// Add beans here...
|
||||
|
||||
@Bean
|
||||
public EmbeddingModel embeddingModel() {
|
||||
return new OpenAiEmbeddingModel(new OpenAiApi(openAiKey));
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public VectorStore mongodbVectorStore(MongoTemplate mongoTemplate, EmbeddingModel embeddingModel) {
|
||||
return new MongoDBAtlasVectorStore(mongoTemplate, embeddingModel,
|
||||
MongoDBAtlasVectorStore.MongoDBVectorStoreConfig.builder().build(), initSchema);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
package com.olympus.apollo.controllers;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.mvc.support.RedirectAttributes;
|
||||
|
||||
import com.olympus.apollo.models.KSDocument;
|
||||
import com.olympus.apollo.models.KSIngestionInfo;
|
||||
import com.olympus.apollo.repository.KSDocumentRepository;
|
||||
import com.olympus.apollo.repository.KSIngestionInfoRepository;
|
||||
import com.olympus.apollo.services.StorageFileNotFoundException;
|
||||
import com.olympus.apollo.services.StorageService;
|
||||
|
||||
@Controller
|
||||
public class KSFileController {
|
||||
@Autowired
|
||||
private StorageService storageService;
|
||||
@Autowired
|
||||
private KSDocumentRepository ksDocumentREpository;
|
||||
@Autowired
|
||||
private KSIngestionInfoRepository ksIngestionInfoRepository;
|
||||
|
||||
|
||||
@PostMapping("/upload")
|
||||
public String handleFileUpload(@RequestParam("file") MultipartFile file) {
|
||||
|
||||
String filePath = storageService.store(file);
|
||||
KSDocument ksDocument = new KSDocument();
|
||||
ksDocument.setFilePath(filePath);
|
||||
ksDocument.setFileName(file.getOriginalFilename());
|
||||
ksDocument.setName(file.getOriginalFilename());
|
||||
ksDocument.setDescription("Uploaded file");
|
||||
ksDocument.setIngestionStatus("NEW");
|
||||
|
||||
KSIngestionInfo ksIngestionInfo = new KSIngestionInfo();
|
||||
ksIngestionInfo.setType("MD_DOCUMENT"); //TODO: This should be dynamic
|
||||
ksIngestionInfo.setVdbIndex("atf_documentation");
|
||||
ksIngestionInfo.setMetadata(filePath);
|
||||
ksIngestionInfoRepository.save(ksIngestionInfo);
|
||||
ksIngestionInfo.setDefaultChunkSize(1000);
|
||||
ksIngestionInfo.setMinChunkSize(200);
|
||||
ksIngestionInfo.setMaxNumberOfChunks(1000);
|
||||
ksIngestionInfo.setMinChunkSizeToEmbed(20);
|
||||
|
||||
ksDocument.setIngestionInfo(ksIngestionInfo);
|
||||
|
||||
|
||||
|
||||
ksDocumentREpository.save(ksDocument);
|
||||
return "OK";
|
||||
}
|
||||
|
||||
@ExceptionHandler(StorageFileNotFoundException.class)
|
||||
public ResponseEntity<?> handleStorageFileNotFound(StorageFileNotFoundException exc) {
|
||||
return ResponseEntity.notFound().build();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
package com.olympus.apollo.controllers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import com.olympus.apollo.services.KSIngestor;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
|
||||
|
||||
@RestController
|
||||
public class TestController {
|
||||
|
||||
@Autowired
|
||||
KSIngestor ksIngestor;
|
||||
|
||||
|
||||
@GetMapping("test/ingestion_loop")
|
||||
public String testIngestionLoop() {
|
||||
ksIngestor.ingestLoop();
|
||||
return "Ingestion Loop Completed";
|
||||
}
|
||||
|
||||
@GetMapping("test/query_vector")
|
||||
public List<String> testSimilaritySearch(@RequestParam String query) {
|
||||
return ksIngestor.testSimilaritySearch(query,"documentation");
|
||||
}
|
||||
|
||||
@GetMapping("test/delete")
|
||||
public String deleteAllFromVectore(@RequestParam String query) {
|
||||
ksIngestor.deleteAll("3-automated-test-framework---atf.md");
|
||||
return "Deleted";
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
74
src/main/java/com/olympus/apollo/models/KSDocument.java
Normal file
74
src/main/java/com/olympus/apollo/models/KSDocument.java
Normal file
@@ -0,0 +1,74 @@
|
||||
package com.olympus.apollo.models;
|
||||
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.GeneratedValue;
|
||||
import jakarta.persistence.Id;
|
||||
import jakarta.persistence.OneToOne;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Entity
|
||||
@Getter @Setter
|
||||
public class KSDocument {
|
||||
|
||||
|
||||
@GeneratedValue
|
||||
private @Id Long id;
|
||||
|
||||
private String name;
|
||||
private String description;
|
||||
|
||||
private String filePath;
|
||||
private String fileName;
|
||||
|
||||
@OneToOne
|
||||
private KSIngestionInfo ingestionInfo;
|
||||
|
||||
private String ingestionStatus;
|
||||
|
||||
public Long getId() {
|
||||
return id;
|
||||
}
|
||||
public void setId(Long id) {
|
||||
this.id = id;
|
||||
}
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
public void setDescription(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
public void setFilePath(String filePath) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
public void setFileName(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
public KSIngestionInfo getIngestionInfo() {
|
||||
return ingestionInfo;
|
||||
}
|
||||
public void setIngestionInfo(KSIngestionInfo ingestionInfo) {
|
||||
this.ingestionInfo = ingestionInfo;
|
||||
}
|
||||
|
||||
public String getIngestionStatus() {
|
||||
return ingestionStatus;
|
||||
}
|
||||
public void setIngestionStatus(String ingestionStatus) {
|
||||
this.ingestionStatus = ingestionStatus;
|
||||
}
|
||||
|
||||
}
|
||||
31
src/main/java/com/olympus/apollo/models/KSIngestionInfo.java
Normal file
31
src/main/java/com/olympus/apollo/models/KSIngestionInfo.java
Normal file
@@ -0,0 +1,31 @@
|
||||
package com.olympus.apollo.models;
|
||||
|
||||
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.GeneratedValue;
|
||||
import jakarta.persistence.Id;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Entity
|
||||
@Getter @Setter
|
||||
public class KSIngestionInfo {
|
||||
|
||||
@GeneratedValue
|
||||
private @Id Long id;
|
||||
|
||||
private String ingestionMessage;
|
||||
private String ingestionDate;
|
||||
|
||||
private String vdbIndex;
|
||||
|
||||
private String type;
|
||||
|
||||
private String metadata;
|
||||
|
||||
private int minChunkSizeToEmbed;
|
||||
private int maxNumberOfChunks;
|
||||
private int minChunkSize;
|
||||
private int defaultChunkSize;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
package com.olympus.apollo.repository;
|
||||
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.data.repository.CrudRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import com.olympus.apollo.models.KSDocument;
|
||||
|
||||
@Repository
|
||||
public interface KSDocumentRepository extends CrudRepository<KSDocument, Long> {
|
||||
|
||||
public Iterable<KSDocument> findAllByIngestionStatus(String status);
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.olympus.apollo.repository;
|
||||
|
||||
import org.springframework.data.repository.CrudRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import com.olympus.apollo.models.KSIngestionInfo;
|
||||
|
||||
@Repository
|
||||
public interface KSIngestionInfoRepository extends CrudRepository<KSIngestionInfo, Long> {
|
||||
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
package com.olympus.apollo.services;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.core.io.UrlResource;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.FileSystemUtils;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
@Service
|
||||
public class FileSystemStorageService implements StorageService {
|
||||
|
||||
private final Path rootLocation;
|
||||
|
||||
@Autowired
|
||||
public FileSystemStorageService(StorageProperties properties) {
|
||||
|
||||
if(properties.getLocation().trim().length() == 0){
|
||||
throw new StorageException("File upload location can not be Empty.");
|
||||
}
|
||||
|
||||
this.rootLocation = Paths.get(properties.getLocation());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String store(MultipartFile file) {
|
||||
String destinationFileString=null;
|
||||
|
||||
try {
|
||||
if (file.isEmpty()) {
|
||||
throw new StorageException("Failed to store empty file.");
|
||||
}
|
||||
Path destinationFile = this.rootLocation.resolve(
|
||||
Paths.get(file.getOriginalFilename()))
|
||||
.normalize().toAbsolutePath();
|
||||
if (!destinationFile.getParent().equals(this.rootLocation.toAbsolutePath())) {
|
||||
// This is a security check
|
||||
throw new StorageException(
|
||||
"Cannot store file outside current directory.");
|
||||
}
|
||||
try (InputStream inputStream = file.getInputStream()) {
|
||||
Files.copy(inputStream, destinationFile,
|
||||
StandardCopyOption.REPLACE_EXISTING);
|
||||
destinationFileString=destinationFile.toString();
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new StorageException("Failed to store file.", e);
|
||||
}
|
||||
return destinationFileString;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<Path> loadAll() {
|
||||
try {
|
||||
return Files.walk(this.rootLocation, 1)
|
||||
.filter(path -> !path.equals(this.rootLocation))
|
||||
.map(this.rootLocation::relativize);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new StorageException("Failed to read stored files", e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path load(String filename) {
|
||||
return rootLocation.resolve(filename);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Resource loadAsResource(String filename) {
|
||||
try {
|
||||
Path file = load(filename);
|
||||
Resource resource = new UrlResource(file.toUri());
|
||||
if (resource.exists() || resource.isReadable()) {
|
||||
return resource;
|
||||
}
|
||||
else {
|
||||
throw new StorageFileNotFoundException(
|
||||
"Could not read file: " + filename);
|
||||
|
||||
}
|
||||
}
|
||||
catch (MalformedURLException e) {
|
||||
throw new StorageFileNotFoundException("Could not read file: " + filename, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteAll() {
|
||||
FileSystemUtils.deleteRecursively(rootLocation.toFile());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
try {
|
||||
Files.createDirectories(rootLocation);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new StorageException("Could not initialize storage", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
126
src/main/java/com/olympus/apollo/services/KSIngestor.java
Normal file
126
src/main/java/com/olympus/apollo/services/KSIngestor.java
Normal file
@@ -0,0 +1,126 @@
|
||||
package com.olympus.apollo.services;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.codelibs.jhighlight.fastutil.Hash;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.reader.tika.TikaDocumentReader;
|
||||
import org.springframework.ai.transformer.KeywordMetadataEnricher;
|
||||
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
||||
import org.springframework.ai.vectorstore.SearchRequest;
|
||||
import org.springframework.ai.vectorstore.VectorStore;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.olympus.apollo.models.KSIngestionInfo;
|
||||
import com.olympus.apollo.repository.KSDocumentRepository;
|
||||
|
||||
|
||||
@Service
|
||||
public class KSIngestor {
|
||||
|
||||
@Autowired
|
||||
private KSDocumentRepository ksDocumentRepository;
|
||||
@Autowired
|
||||
private FileSystemStorageService storageService;
|
||||
|
||||
@Autowired
|
||||
private VectorStore vectorStore;
|
||||
|
||||
Logger logger = LoggerFactory.getLogger(KSIngestor.class);
|
||||
|
||||
public void deleteAll(String document_file_name) {
|
||||
List<Document> docToDelete = vectorStore.similaritySearch(SearchRequest.defaults().withQuery("*")
|
||||
.withSimilarityThreshold(0.0)
|
||||
.withFilterExpression("'source'=='3-automated-test-framework---atf.md'"));
|
||||
|
||||
logger.info("Number of documents to delete: " + docToDelete.size());
|
||||
}
|
||||
|
||||
public void ingestLoop() {
|
||||
|
||||
|
||||
ksDocumentRepository.findAllByIngestionStatus("NEW").forEach(ksDocument -> {
|
||||
// ingest the document
|
||||
ksDocument.setIngestionStatus("IN PROGRESS");
|
||||
ksDocumentRepository.save(ksDocument);
|
||||
|
||||
Resource file = storageService.loadAsResource(ksDocument.getFilePath());
|
||||
TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(file);
|
||||
|
||||
List<Document> docs = tikaDocumentReader.read();
|
||||
|
||||
logger.info("Ingested document: " + ksDocument.getFilePath());
|
||||
logger.info("Number of documents: " + docs.size());
|
||||
|
||||
KSIngestionInfo ingestionInfo = ksDocument.getIngestionInfo();
|
||||
|
||||
|
||||
TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(),
|
||||
ingestionInfo.getMinChunkSize(),
|
||||
ingestionInfo.getMinChunkSizeToEmbed(),
|
||||
ingestionInfo.getMaxNumberOfChunks(),
|
||||
true);
|
||||
|
||||
|
||||
docs.forEach(doc -> {
|
||||
List<Document> splitDocs = splitter.split(doc);
|
||||
|
||||
logger.info("Number of documents: " + splitDocs.size());
|
||||
for (Document splitDoc : splitDocs) {
|
||||
splitDoc.getMetadata().putAll(getMetadata(ingestionInfo));
|
||||
}
|
||||
embedDocuments(splitDocs, ingestionInfo);
|
||||
});
|
||||
|
||||
ksDocument.setIngestionStatus("NEW");
|
||||
ksDocumentRepository.save(ksDocument);
|
||||
|
||||
});
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void embedDocuments(List<Document> docs, KSIngestionInfo ingestionInfo) {
|
||||
|
||||
logger.info("Embedding documents");
|
||||
|
||||
vectorStore.add(docs);
|
||||
logger.info("Documents embedded");
|
||||
}
|
||||
|
||||
public List<String> testSimilaritySearch(String query,String filter_doc_type) {
|
||||
List<Document> docs = vectorStore.similaritySearch(
|
||||
SearchRequest.defaults()
|
||||
.withQuery(query)
|
||||
.withTopK(5).withSimilarityThreshold(0.8)
|
||||
.withFilterExpression("'ks_document_type'=='"+filter_doc_type+"'"));
|
||||
|
||||
List<String> result = new ArrayList<String>();
|
||||
for (Document doc : docs) {
|
||||
result.add(doc.getContent());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private HashMap<String, String> getMetadata(KSIngestionInfo ingestionInfo) {
|
||||
HashMap<String, String> metadata = new HashMap<String, String>();
|
||||
|
||||
String[] metadatas = ingestionInfo.getMetadata().split(";");
|
||||
for (String meta : metadatas) {
|
||||
String[] keyValue = meta.split(":");
|
||||
metadata.put(keyValue[0], keyValue[1]);
|
||||
}
|
||||
|
||||
return metadata;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.olympus.apollo.services;
|
||||
|
||||
public class StorageException extends RuntimeException {
|
||||
public StorageException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public StorageException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.olympus.apollo.services;
|
||||
|
||||
public class StorageFileNotFoundException extends StorageException {
|
||||
|
||||
public StorageFileNotFoundException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public StorageFileNotFoundException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.olympus.apollo.services;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
@ConfigurationProperties("storage")
|
||||
public class StorageProperties {
|
||||
/**
|
||||
* Folder location for storing files
|
||||
*/
|
||||
private String location = "/Users/andreaterzani/Desktop/dev/olympus/upload-dir";
|
||||
|
||||
public String getLocation() {
|
||||
return location;
|
||||
}
|
||||
|
||||
public void setLocation(String location) {
|
||||
this.location = location;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.olympus.apollo.services;
|
||||
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.core.io.Resource;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public interface StorageService {
|
||||
|
||||
void init();
|
||||
|
||||
String store(MultipartFile file);
|
||||
|
||||
Stream<Path> loadAll();
|
||||
|
||||
Path load(String filename);
|
||||
|
||||
Resource loadAsResource(String filename);
|
||||
|
||||
void deleteAll();
|
||||
}
|
||||
Reference in New Issue
Block a user