From 35bd7e3cf5bb5b574b9c44fdb28569ac79d4b083 Mon Sep 17 00:00:00 2001 From: sumedh Date: Tue, 27 Aug 2024 17:25:22 +0530 Subject: [PATCH] git clone functionality added --- .../controllers/FeApi/KSGitController.java | 66 +- .../apollo/controllers/TestController.java | 24 +- .../com/olympus/apollo/dto/GitCloneInput.java | 23 + .../olympus/apollo/dto/GitCloneOutput.java | 10 + .../com/olympus/apollo/dto/GitPullOutput.java | 13 + .../com/olympus/apollo/models/KSGitInfo.java | 2 + .../repository/KSGitInfoRepository.java | 6 + .../repository/VectorStoreRepository.java | 5 + .../services/GitRepositoryIngestor.java | 733 +++++++++++------- .../olympus/apollo/services/GitService.java | 204 +++++ src/main/resources/application.properties | 6 +- 11 files changed, 787 insertions(+), 305 deletions(-) create mode 100644 src/main/java/com/olympus/apollo/dto/GitCloneInput.java create mode 100644 src/main/java/com/olympus/apollo/dto/GitCloneOutput.java create mode 100644 src/main/java/com/olympus/apollo/dto/GitPullOutput.java create mode 100644 src/main/java/com/olympus/apollo/services/GitService.java diff --git a/src/main/java/com/olympus/apollo/controllers/FeApi/KSGitController.java b/src/main/java/com/olympus/apollo/controllers/FeApi/KSGitController.java index 43242dc..cd0008f 100644 --- a/src/main/java/com/olympus/apollo/controllers/FeApi/KSGitController.java +++ b/src/main/java/com/olympus/apollo/controllers/FeApi/KSGitController.java @@ -1,28 +1,24 @@ package com.olympus.apollo.controllers.FeApi; +import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.List; +import com.olympus.apollo.dto.*; +import com.olympus.apollo.services.GitService; +import org.eclipse.jgit.api.errors.GitAPIException; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.CrossOrigin; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.bind.annotation.*; -import com.olympus.apollo.dto.KSGitInfoDTO; -import com.olympus.apollo.dto.KSGitUploadDTO; import com.olympus.apollo.models.KSGitInfo; import com.olympus.apollo.models.KSGitIngestionInfo; import com.olympus.apollo.repository.KSGitInfoRepository; import com.olympus.apollo.repository.KSGitIngestionInfoRepository; import com.olympus.apollo.services.KSGitInfoService; -import io.swagger.v3.oas.annotations.parameters.RequestBody; - @CrossOrigin(origins = "http://localhost:5173") @RestController @RequestMapping("/fe-api/ks_git_repos") @@ -33,6 +29,9 @@ public class KSGitController { @Autowired private KSGitIngestionInfoRepository ksGitIngestionInfoRepository; + @Autowired + private GitService gitService; + @GetMapping("") public List listGitInfo() { List result = (List) ksGitInfoRepository.findAll(); @@ -75,4 +74,53 @@ public class KSGitController { return ResponseEntity.ok("Upload successful"); } + //clone the master branch from remote repository + @PostMapping("/clone") + public GitCloneOutput gitClone(@RequestBody GitCloneInput gitCloneInput) throws GitAPIException, IOException { + KSGitInfo ksGitInfo = new KSGitInfo(); + ksGitInfo.setRepoName(gitCloneInput.getRepoName()); + ksGitInfo.setBranch(gitCloneInput.getBranch()); + ksGitInfo.setCommitId(gitCloneInput.getCommitId()); + ksGitInfo.setRepoPath(gitCloneInput.getRepoPath()); + ksGitInfo.setIngestionStatus("NEW"); + ksGitInfo.setIngestionDate(new Date()); + ksGitInfo.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date())); + + KSGitIngestionInfo ksGitIngestionInfo = new KSGitIngestionInfo(); + HashMap metadata = new HashMap<>(); + + metadata.put("KsApplicationName", gitCloneInput.getRepoName()); + metadata.put("KsDoctype", "gitrepository"); + metadata.put("KsDocSource", "gitlab"); + metadata.put("KsFileSource", gitCloneInput.getRepoName()); + + metadata.put("KsBranch", gitCloneInput.getBranch()); + metadata.put("KsRepoName", gitCloneInput.getRepoName()); + + ksGitIngestionInfo.setMetadata(metadata); + ksGitIngestionInfo.setMinChunkSizeToEmbed(gitCloneInput.getMinChunkSizeToEmbed()); + ksGitIngestionInfo.setMaxNumberOfChunks(gitCloneInput.getMaxNumberOfChunks()); + ksGitIngestionInfo.setMinChunkSize(gitCloneInput.getMinChunkSize()); + ksGitIngestionInfo.setDefaultChunkSize(gitCloneInput.getDefaultChunkSize()); + + ksGitIngestionInfoRepository.save(ksGitIngestionInfo); + ksGitInfo.setKsGitIngestionInfo(ksGitIngestionInfo); + ksGitInfoRepository.save(ksGitInfo); + + return gitService.cloneRepository(gitCloneInput.getSource(),gitCloneInput.getRepoName(),gitCloneInput.getGroup(),gitCloneInput.getTokenType()); + } + /* + curl --location 'http://localhost:8082/fe-api/ks_git_repos/clone' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer ' \ + --data '{"repoName":"shellExecutionThroughAPI","group":"automationtester23","source":"https://github.com","tokenType":"github","branch":"master","commitId":"dummy","repoPath":"C:\\repos\\olympus_ai\\gitClone","minChunkSizeToEmbed":20,"maxNumberOfChunks":1988,"minChunkSize":200,"defaultChunkSize":1988} +' + */ + + //pull latest changes from master branch + @GetMapping("/pullchanges/{repoName}") + public GitPullOutput gitPull(@PathVariable String repoName){ + return gitService.pullChanges(repoName); + } + } diff --git a/src/main/java/com/olympus/apollo/controllers/TestController.java b/src/main/java/com/olympus/apollo/controllers/TestController.java index 732c98f..e209545 100644 --- a/src/main/java/com/olympus/apollo/controllers/TestController.java +++ b/src/main/java/com/olympus/apollo/controllers/TestController.java @@ -2,6 +2,7 @@ package com.olympus.apollo.controllers; import java.util.List; +import com.olympus.apollo.services.GitService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -27,6 +28,9 @@ public class TestController { @Autowired GitRepositoryIngestor gitRepositoryIngestor; + @Autowired + GitService gitService; + private static final Logger logger = LoggerFactory.getLogger(TestController.class); @GetMapping("test/ingestion_loop") @@ -64,6 +68,25 @@ public class TestController { } } + @GetMapping("test/reingest_repo/{repoName}") + public ResponseEntity ReIngestRepo(@PathVariable String repoName) { + try { + gitService.pullChanges(repoName); + gitRepositoryIngestor.ReIngestGitRepository(repoName); + return ResponseEntity.ok("Ingestion Started"); + } catch (Exception e) { + + logger.error("Error during ingestion start", e); + + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR) + .body("Error starting ingestion: " + e.getMessage()); + } + } + /* + curl --location 'http://localhost:8082/test/reingest_repo/shellExecutionThroughAPI' \ + --header 'Authorization: Bearer ' + */ + @GetMapping("test/check_ingestion_status/{repoName}") public ResponseEntity checkIngestionStatus(@PathVariable String repoName) { try { @@ -79,5 +102,4 @@ public class TestController { return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorOutput); } } - } diff --git a/src/main/java/com/olympus/apollo/dto/GitCloneInput.java b/src/main/java/com/olympus/apollo/dto/GitCloneInput.java new file mode 100644 index 0000000..1751615 --- /dev/null +++ b/src/main/java/com/olympus/apollo/dto/GitCloneInput.java @@ -0,0 +1,23 @@ +package com.olympus.apollo.dto; + +import lombok.Getter; +import lombok.Setter; + +import java.util.HashMap; + +@Setter +@Getter +public class GitCloneInput { + private String source; + private String repoName; + private String group; + private String tokenType; + + private String branch; + private String commitId; + private String repoPath; + private int minChunkSizeToEmbed; + private int maxNumberOfChunks; + private int minChunkSize; + private int defaultChunkSize; +} diff --git a/src/main/java/com/olympus/apollo/dto/GitCloneOutput.java b/src/main/java/com/olympus/apollo/dto/GitCloneOutput.java new file mode 100644 index 0000000..92bf879 --- /dev/null +++ b/src/main/java/com/olympus/apollo/dto/GitCloneOutput.java @@ -0,0 +1,10 @@ +package com.olympus.apollo.dto; + +import lombok.Getter; +import lombok.Setter; + +@Setter @Getter +public class GitCloneOutput { + private String message; + private String repoName; +} diff --git a/src/main/java/com/olympus/apollo/dto/GitPullOutput.java b/src/main/java/com/olympus/apollo/dto/GitPullOutput.java new file mode 100644 index 0000000..d274957 --- /dev/null +++ b/src/main/java/com/olympus/apollo/dto/GitPullOutput.java @@ -0,0 +1,13 @@ +package com.olympus.apollo.dto; + +import lombok.Getter; +import lombok.Setter; + +import java.util.Map; + +@Setter @Getter +public class GitPullOutput { + private Map changes; + private String repoName; + private String message; +} diff --git a/src/main/java/com/olympus/apollo/models/KSGitInfo.java b/src/main/java/com/olympus/apollo/models/KSGitInfo.java index b027de4..c4d4c2f 100644 --- a/src/main/java/com/olympus/apollo/models/KSGitInfo.java +++ b/src/main/java/com/olympus/apollo/models/KSGitInfo.java @@ -1,6 +1,7 @@ package com.olympus.apollo.models; import java.util.Date; +import java.util.HashMap; import org.springframework.data.annotation.Id; import org.springframework.data.mongodb.core.mapping.Document; @@ -20,6 +21,7 @@ public class KSGitInfo { private String commitId; private String repoPath; private KSGitIngestionInfo ksGitIngestionInfo; + private HashMap gitModifiedFiles; private String ingestionStatus; private Date ingestionDate; private String ingestionDateFormat; diff --git a/src/main/java/com/olympus/apollo/repository/KSGitInfoRepository.java b/src/main/java/com/olympus/apollo/repository/KSGitInfoRepository.java index f40b017..0ff2ea7 100644 --- a/src/main/java/com/olympus/apollo/repository/KSGitInfoRepository.java +++ b/src/main/java/com/olympus/apollo/repository/KSGitInfoRepository.java @@ -1,8 +1,11 @@ package com.olympus.apollo.repository; +import java.util.List; import java.util.Optional; +import com.olympus.apollo.models.VectorStore; import org.springframework.data.mongodb.repository.MongoRepository; +import org.springframework.data.mongodb.repository.Query; import org.springframework.data.rest.core.annotation.RepositoryRestResource; import org.springframework.stereotype.Repository; import org.springframework.web.bind.annotation.CrossOrigin; @@ -12,4 +15,7 @@ import com.olympus.apollo.models.KSGitInfo; @Repository public interface KSGitInfoRepository extends MongoRepository { Optional findByRepoName(String repoName); + + @Query("{'repoName': ?0, 'ksGitIngestionInfo.metadata.KsApplicationName': ?1}") + Optional findByMetadataAndRepoName(String repoName, String KsApplicationName); } diff --git a/src/main/java/com/olympus/apollo/repository/VectorStoreRepository.java b/src/main/java/com/olympus/apollo/repository/VectorStoreRepository.java index 968437a..a999e45 100644 --- a/src/main/java/com/olympus/apollo/repository/VectorStoreRepository.java +++ b/src/main/java/com/olympus/apollo/repository/VectorStoreRepository.java @@ -8,6 +8,7 @@ import org.springframework.data.mongodb.repository.Query; import org.springframework.stereotype.Repository; import java.util.List; +import java.util.Optional; @Repository @@ -24,4 +25,8 @@ public interface VectorStoreRepository extends MongoRepository findByMetadata(String ksDoctype, String ksDocSource, String ksFileSource, String ksApplicationName); + + @Query("{'metadata.filePath': ?0}") + Optional findByFilePath(String filePath); + } diff --git a/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java b/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java index 4d75b08..7d7c5d7 100644 --- a/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java +++ b/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java @@ -1,6 +1,7 @@ package com.olympus.apollo.services; import java.io.File; +import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Date; @@ -12,7 +13,9 @@ import java.util.concurrent.CompletableFuture; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.olympus.apollo.repository.VectorStoreRepository; import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.errors.GitAPIException; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.treewalk.TreeWalk; @@ -34,18 +37,24 @@ import com.olympus.apollo.repository.KSGitInfoRepository; @Service public class GitRepositoryIngestor { - private final VectorStore vectorStore; + private final VectorStore vectorStore; - @Autowired - private KSGitInfoRepository ksGitInfoRepository; + @Value("${gitlab.path}") + private String localRepoPath; - public GitRepositoryIngestor(VectorStore vectorStore) { - this.vectorStore = vectorStore; - } + @Autowired + private KSGitInfoRepository ksGitInfoRepository; - Logger logger = LoggerFactory.getLogger(GitRepositoryIngestor.class); + @Autowired + private VectorStoreRepository vectorStoreRepository; - @Async + public GitRepositoryIngestor(VectorStore vectorStore) { + this.vectorStore = vectorStore; + } + + Logger logger = LoggerFactory.getLogger(GitRepositoryIngestor.class); + + @Async public CompletableFuture ingestGitRepository(String repo) { //String repoPath = "C:\\Users\\s.shamrao.shinde\\GenAIStorage\\" + repo + "\\"; //String repoPath = ksGitInfo.getRepoPath() + repo + "\\"; @@ -66,338 +75,474 @@ public class GitRepositoryIngestor { } - private void ingestRepo(String repo, KSGitInfo ksGitInfo) { - String repoPath = "/mnt/apollo_storage/repository" +"\\"+ repo + "\\"; - logger.info("Repository path : " + repoPath); - try (Git git = Git.open(new File(repoPath))) { - ksGitInfo.setIngestionStatus("IN PROGRESS"); + private void ingestRepo(String repo, KSGitInfo ksGitInfo) { + String repoPath = localRepoPath+"/"+ repo + "/"; + //String repoPath = "C:\\repos\\olympus_ai\\gitClone" + "\\" + repo + "\\"; + logger.info("Repository path : " + repoPath); + try (Git git = Git.open(new File(repoPath))) { + ksGitInfo.setIngestionStatus("IN PROGRESS"); - KSGitIngestionInfo ingestionInfo = ksGitInfo.getKsGitIngestionInfo(); - logger.info("Metadata : " + ingestionInfo.getMetadata()); - ksGitInfoRepository.save(ksGitInfo); + KSGitIngestionInfo ingestionInfo = ksGitInfo.getKsGitIngestionInfo(); + logger.info("Metadata : " + ingestionInfo.getMetadata()); + ksGitInfoRepository.save(ksGitInfo); - Repository repository = git.getRepository(); - RevCommit latestCommit = git.log().setMaxCount(1).call().iterator().next(); + Repository repository = git.getRepository(); + RevCommit latestCommit = git.log().setMaxCount(1).call().iterator().next(); - try (TreeWalk treeWalk = new TreeWalk(repository)) { - treeWalk.addTree(latestCommit.getTree()); - treeWalk.setRecursive(true); + try (TreeWalk treeWalk = new TreeWalk(repository)) { + treeWalk.addTree(latestCommit.getTree()); + treeWalk.setRecursive(true); - List documents = new ArrayList<>(); + List documents = new ArrayList<>(); - while (treeWalk.next()) { - String filePath = treeWalk.getPathString(); - String fileName = treeWalk.getNameString(); + while (treeWalk.next()) { + String filePath = treeWalk.getPathString(); + String fileName = treeWalk.getNameString(); - if (isRelevantFile(fileName)) { - byte[] fileContent = repository.open(treeWalk.getObjectId(0)).getBytes(); - String fileContentStr = new String(fileContent, StandardCharsets.UTF_8); + if (isRelevantFile(fileName)) { + byte[] fileContent = repository.open(treeWalk.getObjectId(0)).getBytes(); + String fileContentStr = new String(fileContent, StandardCharsets.UTF_8); - Map metadata = extractMetadata(fileName, fileContentStr); - metadata.put("filePath", filePath); - metadata.put("fileName", fileName); + Map metadata = extractMetadata(fileName, fileContentStr); + metadata.put("filePath", filePath); + metadata.put("fileName", fileName); - Document doc = new Document(fileContentStr); - doc.getMetadata().putAll(metadata); + Document doc = new Document(fileContentStr); + doc.getMetadata().putAll(metadata); - doc.getMetadata().putAll(ingestionInfo.getMetadata()); - documents.add(doc); - } - } + doc.getMetadata().putAll(ingestionInfo.getMetadata()); + documents.add(doc); + } + } - TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(), - ingestionInfo.getMinChunkSize(), ingestionInfo.getMinChunkSizeToEmbed(), - ingestionInfo.getMaxNumberOfChunks(), false); + TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(), + ingestionInfo.getMinChunkSize(), ingestionInfo.getMinChunkSizeToEmbed(), + ingestionInfo.getMaxNumberOfChunks(), false); - List splitDocuments = splitter.split(documents); - logger.info("Number of documents: " + splitDocuments.size()); - vectorStore.add(splitDocuments); - logger.info("Documents embedded"); - } + List splitDocuments = splitter.split(documents); + logger.info("Number of documents: " + splitDocuments.size()); + vectorStore.add(splitDocuments); + logger.info("Documents embedded"); + } - ksGitInfo.setIngestionStatus("INGESTED"); - ksGitInfo.setIngestionDate(new Date()); - ksGitInfoRepository.save(ksGitInfo); - } catch (Exception e) { - ksGitInfo.setIngestionStatus("ERROR"); - ksGitInfoRepository.save(ksGitInfo); - logger.error("Error during ingestion", e); - } - } - - public IngestionOutput checkIngestionStatus(String repoName) { - Optional optionalDocument = ksGitInfoRepository.findByRepoName(repoName); - IngestionOutput ingestionOutput = new IngestionOutput(); - if (optionalDocument.isPresent()) { - KSGitInfo ksGitInfo = optionalDocument.get(); - ingestionOutput.setStatus(ksGitInfo.getIngestionStatus()); - ingestionOutput.setMessage("Status Retrieved"); - if ("INGESTED".equals(ksGitInfo.getIngestionStatus())) { - ingestionOutput.getIngestedDocumentId().add(ksGitInfo.getId()); - } - } else { - ingestionOutput.setStatus("ERROR"); - ingestionOutput.setMessage("Document Not Found"); - } - return ingestionOutput; - } + ksGitInfo.setIngestionStatus("INGESTED"); + ksGitInfo.setIngestionDate(new Date()); + ksGitInfoRepository.save(ksGitInfo); + } catch (Exception e) { + ksGitInfo.setIngestionStatus("ERROR"); + ksGitInfoRepository.save(ksGitInfo); + logger.error("Error during ingestion", e); + } + } + + public CompletableFuture ReIngestGitRepository(String repo) throws GitAPIException, IOException { + //String repoPath = "C:\\Users\\s.shamrao.shinde\\GenAIStorage\\" + repo + "\\"; + //String repoPath = ksGitInfo.getRepoPath() + repo + "\\"; + //logger.info("Repository path : " + repoPath); + + Optional optionalDocument = ksGitInfoRepository.findByRepoName(repo); + if (optionalDocument.isPresent()) { + KSGitInfo ksGitInfo = optionalDocument.get(); + if ("INGESTED".equals(ksGitInfo.getIngestionStatus())) { + reIngestRepo(repo, ksGitInfo); + } else { + logger.info("OOPS: Document is already Injected"); + } + } else { + logger.info("OOPS: Document Not found"); + } + return CompletableFuture.completedFuture(null); + } + + private void reIngestRepo(String repo, KSGitInfo ksGitInfo) throws IOException, GitAPIException { - private boolean isRelevantFile(String fileName) { - // Add more relevant file extensions as needed - boolean response = false; - if (fileName.endsWith(".java")) { - response = true; - } else if (fileName.endsWith(".py")) { - response = true; - } else if (fileName.endsWith(".js")) { - response = true; - } else if (fileName.endsWith(".vue")) { - response = true; - } else if (fileName.endsWith(".groovy") || fileName.endsWith(".jenkins") || fileName.endsWith(".jenkinsfile")) { - response = true; - } - return response; - } + HashMap modifiedFiles = ksGitInfo.getGitModifiedFiles(); - private Map extractMetadata(String fileName, String fileContent) { - Map metadata = new HashMap<>(); - if (fileName.endsWith(".java")) { - metadata.putAll(extractJavaMetadata(fileContent)); - } else if (fileName.endsWith(".py")) { - metadata.putAll(extractPythonMetadata(fileContent)); - } else if (fileName.endsWith(".js")) { - metadata.putAll(extractJavaScriptMetadata(fileContent)); - } else if (fileName.endsWith(".vue")) { - metadata.putAll(extractVueMetadata(fileContent)); - } else if (fileName.endsWith(".groovy") || fileName.endsWith(".jenkins") || fileName.endsWith(".jenkinsfile")) { - metadata.putAll(extractGroovyMetadata(fileContent)); - } - return metadata; - } + List filePathsToDelete = new ArrayList<>(); + List filePathsToEmbed = new ArrayList<>(); - /* - * private Map extractJavaMetadata(String fileContent) { - * Map metadata = new HashMap<>(); // Simple regex to find class - * names (this is a basic implementation and might // miss some cases) Pattern - * classPattern = Pattern.compile("class\\s+(\\w+)"); Matcher classMatcher = - * classPattern.matcher(fileContent); List classNames = new - * ArrayList<>(); while (classMatcher.find()) { - * classNames.add(classMatcher.group(1)); } metadata.put("classNames", - * String.join(",", classNames)); return metadata; } - */ + for (Map.Entry entry : modifiedFiles.entrySet()) { + switch (entry.getKey()) { + case "MODIFY": + String[] modifiedFileList = entry.getValue().split(","); + for (String modifiedFile : modifiedFileList) { + filePathsToDelete.add(modifiedFile); + filePathsToEmbed.add(modifiedFile); + } + break; + case "ADD": + String[] addedFileList = entry.getValue().split(","); + for (String addFile : addedFileList) { + filePathsToEmbed.add(addFile); + } + break; + case "DELETE": + String[] deletedFileList = entry.getValue().split(","); + for (String deletedFile : deletedFileList) { + filePathsToDelete.add(deletedFile); + } + break; + default: + break; + } + for (String fileToDelete : filePathsToDelete) { + Optional optionalDocument = vectorStoreRepository.findByFilePath(fileToDelete); + if (optionalDocument.isPresent()) { + String vectorStoreId = optionalDocument.get().getId(); + vectorStoreRepository.deleteById(vectorStoreId); + } + } - private Map extractJavaMetadata(String fileContent) { - Map metadata = new HashMap<>(); + } + String repoPath = localRepoPath +"/"+ repo + "/"; + //String repoPath = "C:\\repos\\olympus_ai\\gitClone" + "\\" + repo + "\\"; //need to modify before deploy + logger.info("Repository path : " + repoPath); - // Extract package name - Pattern packagePattern = Pattern.compile("package\\s+([\\w\\.]+);"); - Matcher packageMatcher = packagePattern.matcher(fileContent); - if (packageMatcher.find()) { - metadata.put("packageName", packageMatcher.group(1)); - } + try (Git git = Git.open(new File(repoPath))) { + ksGitInfo.setIngestionStatus("IN PROGRESS"); + KSGitIngestionInfo ingestionInfo = ksGitInfo.getKsGitIngestionInfo(); + logger.info("Metadata : " + ingestionInfo.getMetadata()); + ksGitInfoRepository.save(ksGitInfo); - // Extract class names - Pattern classPattern = Pattern.compile("\\bclass\\s+(\\w+)"); - Matcher classMatcher = classPattern.matcher(fileContent); - List classNames = new ArrayList<>(); - while (classMatcher.find()) { - classNames.add(classMatcher.group(1)); - } - metadata.put("classNames", String.join(",", classNames)); + Repository repository = git.getRepository(); + RevCommit latestCommit = git.log().setMaxCount(1).call().iterator().next(); - // Extract method names - Pattern methodPattern = Pattern.compile("\\b(?:public|protected|private|static|\\s)\\s*\\w+\\s+(\\w+)\\s*\\("); - Matcher methodMatcher = methodPattern.matcher(fileContent); - List methodNames = new ArrayList<>(); - while (methodMatcher.find()) { - methodNames.add(methodMatcher.group(1)); - } - metadata.put("methodNames", String.join(",", methodNames)); + try (TreeWalk treeWalk = new TreeWalk(repository)) { + treeWalk.addTree(latestCommit.getTree()); + treeWalk.setRecursive(true); + List documents = new ArrayList<>(); + for (String filePath : filePathsToEmbed) { - // Extract import statements - Pattern importPattern = Pattern.compile("import\\s+([\\w\\.\\*]+);"); - Matcher importMatcher = importPattern.matcher(fileContent); - List importStatements = new ArrayList<>(); - while (importMatcher.find()) { - importStatements.add(importMatcher.group(1)); - } - metadata.put("importStatements", String.join(",", importStatements)); + String[] parts = filePath.split("/"); + String fileName = parts[parts.length - 1]; + if (isRelevantFile(fileName)) { + boolean fileFound = false; + while (treeWalk.next()) { + if (treeWalk.getPathString().equals(filePath)) { + fileFound = true; + byte[] fileContent = repository.open(treeWalk.getObjectId(0)).getBytes(); + String fileContentStr = new String(fileContent, StandardCharsets.UTF_8); - return metadata; - } + Map metadata = extractMetadata(fileName, fileContentStr); + metadata.put("filePath", filePath); + metadata.put("fileName", fileName); - private Map extractPythonMetadata(String fileContent) { - Map metadata = new HashMap<>(); + Document doc = new Document(fileContentStr); + doc.getMetadata().putAll(metadata); - // Extract class names - Pattern classPattern = Pattern.compile("\\bclass\\s+(\\w+)"); - Matcher classMatcher = classPattern.matcher(fileContent); - List classNames = new ArrayList<>(); - while (classMatcher.find()) { - classNames.add(classMatcher.group(1)); - } - metadata.put("classNames", String.join(",", classNames)); + doc.getMetadata().putAll(ingestionInfo.getMetadata()); + documents.add(doc); + break; + } + } + if (!fileFound) { + logger.warn("File not found in repository: " + filePath); + } + // Reset TreeWalk to start from the beginning for the next file + treeWalk.reset(); + treeWalk.addTree(latestCommit.getTree()); + treeWalk.setRecursive(true); + } + } - // Extract function names - Pattern functionPattern = Pattern.compile("\\bdef\\s+(\\w+)\\s*\\("); - Matcher functionMatcher = functionPattern.matcher(fileContent); - List functionNames = new ArrayList<>(); - while (functionMatcher.find()) { - functionNames.add(functionMatcher.group(1)); - } - metadata.put("functionNames", String.join(",", functionNames)); - // Extract import statements - Pattern importPattern = Pattern.compile("\\bimport\\s+([\\w\\.]+)|\\bfrom\\s+([\\w\\.]+)\\s+import"); - Matcher importMatcher = importPattern.matcher(fileContent); - List importStatements = new ArrayList<>(); - while (importMatcher.find()) { - if (importMatcher.group(1) != null) { - importStatements.add(importMatcher.group(1)); - } else if (importMatcher.group(2) != null) { - importStatements.add(importMatcher.group(2)); - } - } - metadata.put("importStatements", String.join(",", importStatements)); + TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(), + ingestionInfo.getMinChunkSize(), ingestionInfo.getMinChunkSizeToEmbed(), + ingestionInfo.getMaxNumberOfChunks(), false); - return metadata; - } + List splitDocuments = splitter.split(documents); + logger.info("Number of documents: " + splitDocuments.size()); + vectorStore.add(splitDocuments); + logger.info("Documents embedded"); + } - private Map extractJavaScriptMetadata(String fileContent) { - Map metadata = new HashMap<>(); - // Extract function names - Pattern functionPattern = Pattern.compile("\\bfunction\\s+(\\w+)\\s*\\("); - Matcher functionMatcher = functionPattern.matcher(fileContent); - List functionNames = new ArrayList<>(); - while (functionMatcher.find()) { - functionNames.add(functionMatcher.group(1)); - } - metadata.put("functionNames", String.join(",", functionNames)); + ksGitInfo.setIngestionStatus("INGESTED"); + ksGitInfo.setIngestionDate(new Date()); + ksGitInfoRepository.save(ksGitInfo); - // Extract class names - Pattern classPattern = Pattern.compile("\\bclass\\s+(\\w+)"); - Matcher classMatcher = classPattern.matcher(fileContent); - List classNames = new ArrayList<>(); - while (classMatcher.find()) { - classNames.add(classMatcher.group(1)); - } - metadata.put("classNames", String.join(",", classNames)); + } catch (Exception e) { + ksGitInfo.setIngestionStatus("ERROR"); + ksGitInfoRepository.save(ksGitInfo); + logger.error("Error during ingestion", e); + } + } - // Extract import statements - Pattern importPattern = Pattern.compile("\\bimport\\s+[^;]+\\s+from\\s+['\"]([\\w\\.\\/-]+)['\"]"); - Matcher importMatcher = importPattern.matcher(fileContent); - List importStatements = new ArrayList<>(); - while (importMatcher.find()) { - importStatements.add(importMatcher.group(1)); - } - metadata.put("importStatements", String.join(",", importStatements)); + public IngestionOutput checkIngestionStatus(String repoName) { + Optional optionalDocument = ksGitInfoRepository.findByRepoName(repoName); + IngestionOutput ingestionOutput = new IngestionOutput(); + if (optionalDocument.isPresent()) { + KSGitInfo ksGitInfo = optionalDocument.get(); + ingestionOutput.setStatus(ksGitInfo.getIngestionStatus()); + ingestionOutput.setMessage("Status Retrieved"); + if ("INGESTED".equals(ksGitInfo.getIngestionStatus())) { + ingestionOutput.getIngestedDocumentId().add(ksGitInfo.getId()); + } + } else { + ingestionOutput.setStatus("ERROR"); + ingestionOutput.setMessage("Repository Not Found"); + } + return ingestionOutput; + } - return metadata; - } - private Map extractVueMetadata(String fileContent) { - Map metadata = new HashMap<>(); + private boolean isRelevantFile(String fileName) { + // Add more relevant file extensions as needed + boolean response = false; + if (fileName.endsWith(".java")) { + response = true; + } else if (fileName.endsWith(".py")) { + response = true; + } else if (fileName.endsWith(".js")) { + response = true; + } else if (fileName.endsWith(".vue")) { + response = true; + } else if (fileName.endsWith(".groovy") || fileName.endsWith(".jenkins") || fileName.endsWith(".jenkinsfile")) { + response = true; + } + return response; + } - // Extract component name - Pattern namePattern = Pattern.compile("