feat: Update getDocument method in KsDocumentController

Refactor the getDocument method in KsDocumentController to return a single KSDocument object instead of a list. This improves the efficiency and readability of the code. The method now uses the findById method of ksDocumentRepository to retrieve the document with the specified id.
2024-07-31 17:30:08 +02:00
parent 1f8c5a062b
commit 1fdd4f52ec
4 changed files with 169 additions and 3 deletions
--- a/pom.xml
+++ b/pom.xml
@@ -90,6 +90,14 @@
 			<artifactId>lombok</artifactId>
 			<version>1.18.34</version>
 		</dependency>
+
+
+		<dependency>
+			<groupId>org.eclipse.jgit</groupId>
+			<artifactId>org.eclipse.jgit</artifactId>
+			<version>6.8.0.202311291450-r</version>
+		  </dependency>
+
 	</dependencies>

 	<build>
@@ -133,6 +141,10 @@
 			<enabled>false</enabled>
 		</releases>
 		</repository>
+		<repository>
+			<id>jgit-repository</id>
+			<url>https://repo.eclipse.org/content/groups/releases/</url>
+		  </repository>
 	</repositories>
 	<pluginRepositories>
 		<pluginRepository>
--- a/src/main/java/com/olympus/apollo/controllers/FeApi/KsDocumentController.java
+++ b/src/main/java/com/olympus/apollo/controllers/FeApi/KsDocumentController.java
@@ -30,9 +30,9 @@ public class KsDocumentController {
        return result;
    }
    @GetMapping("/{id}")
-    public  List<KSDocument> getDocument(@RequestParam String id) {
+    public KSDocument getDocument(@RequestParam String id) {

-            List<KSDocument> result  = (List<KSDocument>) ksDocumentREpository.findAll();
+            KSDocument result  = ksDocumentREpository.findById(id).get();

        return result;
    }
--- a/src/main/java/com/olympus/apollo/controllers/TestController.java
+++ b/src/main/java/com/olympus/apollo/controllers/TestController.java
@@ -1,10 +1,13 @@
 package com.olympus.apollo.controllers;

+import java.util.HashMap;
 import java.util.List;

 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.web.bind.annotation.RestController;

+import com.olympus.apollo.models.KSIngestionInfo;
+import com.olympus.apollo.services.GitRepositoryIngestor;
 import com.olympus.apollo.services.KSIngestor;
 import org.springframework.web.bind.annotation.GetMapping;
 import org.springframework.web.bind.annotation.RequestParam;
@@ -16,7 +19,9 @@ public class TestController {
    @Autowired
    KSIngestor ksIngestor;

-
+    @Autowired
+    GitRepositoryIngestor gitRepositoryIngestor;
+    
    @GetMapping("test/ingestion_loop")
    public String testIngestionLoop() {
        ksIngestor.ingestLoop();
@@ -34,5 +39,33 @@ public class TestController {
         return "Deleted";
    }

+    @GetMapping("test/ingest_repo")
+    public String ingestRepo() {
+        try {
+
+            KSIngestionInfo ksIngestionInfo = new KSIngestionInfo();
+          
+
+             HashMap<String, String> metadata =  new HashMap<>();
+        
+            metadata.put("KsApplicatioName","doo");
+            metadata.put("KsDoctype","sourcecode");
+            metadata.put("KsDoSource","GIT");
+            ksIngestionInfo.setMetadata(metadata);
+            ksIngestionInfo.setDefaultChunkSize(6000);
+            ksIngestionInfo.setMinChunkSize(200);
+            ksIngestionInfo.setMaxNumberOfChunks(10000);
+            ksIngestionInfo.setMinChunkSizeToEmbed(100);
+
+
+            String repoPath = "C:\\Users\\andrea.terzani\\dev\\DOO2_CLOUD";
+            gitRepositoryIngestor.ingestGitRepository(repoPath, ksIngestionInfo);
+
+
+            return "Ingested";
+        } catch (Exception e) {
+            return "Error";
+        }
+    }

 }
--- a/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java
+++ b/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java
@@ -0,0 +1,121 @@
+package com.olympus.apollo.services;
+
+import org.eclipse.jgit.api.Git;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.eclipse.jgit.treewalk.TreeWalk;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.transformer.splitter.TokenTextSplitter;
+import org.springframework.ai.vectorstore.VectorStore;
+import org.springframework.stereotype.Service;
+
+import com.olympus.apollo.models.KSIngestionInfo;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+@Service
+public class GitRepositoryIngestor {
+
+    private final VectorStore vectorStore;
+
+    public GitRepositoryIngestor( VectorStore vectorStore) {
+        this.vectorStore = vectorStore;
+    }
+
+    public void ingestGitRepository(String repoPath,KSIngestionInfo ingestionInfo) throws Exception {
+        try (Git git = Git.open(new File(repoPath))) {
+            Repository repository = git.getRepository();
+            RevCommit latestCommit = git.log().setMaxCount(1).call().iterator().next();
+
+            try (TreeWalk treeWalk = new TreeWalk(repository)) {
+                treeWalk.addTree(latestCommit.getTree());
+                treeWalk.setRecursive(true);
+
+                List<Document> documents = new ArrayList<>();
+
+                while (treeWalk.next()) {
+                    String filePath = treeWalk.getPathString();
+                    String fileName = treeWalk.getNameString();
+
+                    if (isRelevantFile(fileName)) {
+                        byte[] fileContent = repository.open(treeWalk.getObjectId(0)).getBytes();
+                        String fileContentStr = new String(fileContent, StandardCharsets.UTF_8);
+
+                        Map<String, String> metadata = extractMetadata(fileName, fileContentStr);
+                        metadata.put("filePath", filePath);
+                        metadata.put("fileName", fileName);
+
+                        Document doc = new Document(fileContentStr);
+                        doc.getMetadata().putAll(metadata);
+
+                        doc.getMetadata().putAll(ingestionInfo.getMetadata());
+                        documents.add(doc);
+                    }
+                }
+
+
+                TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(),
+                                                                ingestionInfo.getMinChunkSize(),
+                                                                ingestionInfo.getMinChunkSizeToEmbed(),
+                                                                ingestionInfo.getMaxNumberOfChunks(),
+                                                            false);
+
+                List<Document> splitDocuments = splitter.split(documents);
+
+                vectorStore.add(splitDocuments);
+            }
+        }
+    }
+
+    private boolean isRelevantFile(String fileName) {
+        // Add more relevant file extensions as needed
+        return fileName.endsWith(".java");
+    }
+
+    private Map<String, String> extractMetadata(String fileName, String fileContent) {
+        Map<String, String> metadata = new HashMap<>();
+
+        if (fileName.endsWith(".java")) {
+            metadata.putAll(extractJavaMetadata(fileContent));
+        } else if (fileName.endsWith(".py")) {
+            metadata.putAll(extractPythonMetadata(fileContent));
+        } else if (fileName.endsWith(".js")) {
+            metadata.putAll(extractJavaScriptMetadata(fileContent));
+        }
+
+        return metadata;
+    }
+
+    private Map<String, String> extractJavaMetadata(String fileContent) {
+        Map<String, String> metadata = new HashMap<>();
+        // Simple regex to find class names (this is a basic implementation and might miss some cases)
+        Pattern classPattern = Pattern.compile("class\\s+(\\w+)");
+        Matcher classMatcher = classPattern.matcher(fileContent);
+        List<String> classNames = new ArrayList<>();
+        while (classMatcher.find()) {
+            classNames.add(classMatcher.group(1));
+        }
+        metadata.put("classNames", String.join(",", classNames));
+        return metadata;
+    }
+
+    private Map<String, String> extractPythonMetadata(String fileContent) {
+        // Implement Python-specific metadata extraction
+        // This is a placeholder and should be implemented based on your needs
+        return new HashMap<>();
+    }
+
+    private Map<String, String> extractJavaScriptMetadata(String fileContent) {
+        // Implement JavaScript-specific metadata extraction
+        // This is a placeholder and should be implemented based on your needs
+        return new HashMap<>();
+    }
+}