diff --git a/pom.xml b/pom.xml
index 6717668..1c685a1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -90,6 +90,14 @@
lombok
1.18.34
+
+
+
+ org.eclipse.jgit
+ org.eclipse.jgit
+ 6.8.0.202311291450-r
+
+
@@ -133,6 +141,10 @@
false
+
+ jgit-repository
+ https://repo.eclipse.org/content/groups/releases/
+
diff --git a/src/main/java/com/olympus/apollo/controllers/FeApi/KsDocumentController.java b/src/main/java/com/olympus/apollo/controllers/FeApi/KsDocumentController.java
index 3a46eba..ccb94c7 100644
--- a/src/main/java/com/olympus/apollo/controllers/FeApi/KsDocumentController.java
+++ b/src/main/java/com/olympus/apollo/controllers/FeApi/KsDocumentController.java
@@ -30,9 +30,9 @@ public class KsDocumentController {
return result;
}
@GetMapping("/{id}")
- public List getDocument(@RequestParam String id) {
+ public KSDocument getDocument(@RequestParam String id) {
- List result = (List) ksDocumentREpository.findAll();
+ KSDocument result = ksDocumentREpository.findById(id).get();
return result;
}
diff --git a/src/main/java/com/olympus/apollo/controllers/TestController.java b/src/main/java/com/olympus/apollo/controllers/TestController.java
index 1c2aed2..f5159f1 100644
--- a/src/main/java/com/olympus/apollo/controllers/TestController.java
+++ b/src/main/java/com/olympus/apollo/controllers/TestController.java
@@ -1,10 +1,13 @@
package com.olympus.apollo.controllers;
+import java.util.HashMap;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RestController;
+import com.olympus.apollo.models.KSIngestionInfo;
+import com.olympus.apollo.services.GitRepositoryIngestor;
import com.olympus.apollo.services.KSIngestor;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
@@ -16,7 +19,9 @@ public class TestController {
@Autowired
KSIngestor ksIngestor;
-
+ @Autowired
+ GitRepositoryIngestor gitRepositoryIngestor;
+
@GetMapping("test/ingestion_loop")
public String testIngestionLoop() {
ksIngestor.ingestLoop();
@@ -34,5 +39,33 @@ public class TestController {
return "Deleted";
}
+ @GetMapping("test/ingest_repo")
+ public String ingestRepo() {
+ try {
+
+ KSIngestionInfo ksIngestionInfo = new KSIngestionInfo();
+
+
+ HashMap metadata = new HashMap<>();
+
+ metadata.put("KsApplicatioName","doo");
+ metadata.put("KsDoctype","sourcecode");
+ metadata.put("KsDoSource","GIT");
+ ksIngestionInfo.setMetadata(metadata);
+ ksIngestionInfo.setDefaultChunkSize(6000);
+ ksIngestionInfo.setMinChunkSize(200);
+ ksIngestionInfo.setMaxNumberOfChunks(10000);
+ ksIngestionInfo.setMinChunkSizeToEmbed(100);
+
+
+ String repoPath = "C:\\Users\\andrea.terzani\\dev\\DOO2_CLOUD";
+ gitRepositoryIngestor.ingestGitRepository(repoPath, ksIngestionInfo);
+
+
+ return "Ingested";
+ } catch (Exception e) {
+ return "Error";
+ }
+ }
}
diff --git a/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java b/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java
new file mode 100644
index 0000000..36920f8
--- /dev/null
+++ b/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java
@@ -0,0 +1,121 @@
+package com.olympus.apollo.services;
+
+import org.eclipse.jgit.api.Git;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.treewalk.TreeWalk;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.transformer.splitter.TokenTextSplitter;
+import org.springframework.ai.vectorstore.VectorStore;
+import org.springframework.stereotype.Service;
+
+import com.olympus.apollo.models.KSIngestionInfo;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+@Service
+public class GitRepositoryIngestor {
+
+ private final VectorStore vectorStore;
+
+ public GitRepositoryIngestor( VectorStore vectorStore) {
+ this.vectorStore = vectorStore;
+ }
+
+ public void ingestGitRepository(String repoPath,KSIngestionInfo ingestionInfo) throws Exception {
+ try (Git git = Git.open(new File(repoPath))) {
+ Repository repository = git.getRepository();
+ RevCommit latestCommit = git.log().setMaxCount(1).call().iterator().next();
+
+ try (TreeWalk treeWalk = new TreeWalk(repository)) {
+ treeWalk.addTree(latestCommit.getTree());
+ treeWalk.setRecursive(true);
+
+ List documents = new ArrayList<>();
+
+ while (treeWalk.next()) {
+ String filePath = treeWalk.getPathString();
+ String fileName = treeWalk.getNameString();
+
+ if (isRelevantFile(fileName)) {
+ byte[] fileContent = repository.open(treeWalk.getObjectId(0)).getBytes();
+ String fileContentStr = new String(fileContent, StandardCharsets.UTF_8);
+
+ Map metadata = extractMetadata(fileName, fileContentStr);
+ metadata.put("filePath", filePath);
+ metadata.put("fileName", fileName);
+
+ Document doc = new Document(fileContentStr);
+ doc.getMetadata().putAll(metadata);
+
+ doc.getMetadata().putAll(ingestionInfo.getMetadata());
+ documents.add(doc);
+ }
+ }
+
+
+ TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(),
+ ingestionInfo.getMinChunkSize(),
+ ingestionInfo.getMinChunkSizeToEmbed(),
+ ingestionInfo.getMaxNumberOfChunks(),
+ false);
+
+ List splitDocuments = splitter.split(documents);
+
+ vectorStore.add(splitDocuments);
+ }
+ }
+ }
+
+ private boolean isRelevantFile(String fileName) {
+ // Add more relevant file extensions as needed
+ return fileName.endsWith(".java");
+ }
+
+ private Map extractMetadata(String fileName, String fileContent) {
+ Map metadata = new HashMap<>();
+
+ if (fileName.endsWith(".java")) {
+ metadata.putAll(extractJavaMetadata(fileContent));
+ } else if (fileName.endsWith(".py")) {
+ metadata.putAll(extractPythonMetadata(fileContent));
+ } else if (fileName.endsWith(".js")) {
+ metadata.putAll(extractJavaScriptMetadata(fileContent));
+ }
+
+ return metadata;
+ }
+
+ private Map extractJavaMetadata(String fileContent) {
+ Map metadata = new HashMap<>();
+ // Simple regex to find class names (this is a basic implementation and might miss some cases)
+ Pattern classPattern = Pattern.compile("class\\s+(\\w+)");
+ Matcher classMatcher = classPattern.matcher(fileContent);
+ List classNames = new ArrayList<>();
+ while (classMatcher.find()) {
+ classNames.add(classMatcher.group(1));
+ }
+ metadata.put("classNames", String.join(",", classNames));
+ return metadata;
+ }
+
+ private Map extractPythonMetadata(String fileContent) {
+ // Implement Python-specific metadata extraction
+ // This is a placeholder and should be implemented based on your needs
+ return new HashMap<>();
+ }
+
+ private Map extractJavaScriptMetadata(String fileContent) {
+ // Implement JavaScript-specific metadata extraction
+ // This is a placeholder and should be implemented based on your needs
+ return new HashMap<>();
+ }
+}