From b90acc55e0a3b5946dd4edfd558dec722d7d30f2 Mon Sep 17 00:00:00 2001 From: Mishra Date: Thu, 8 Aug 2024 16:23:32 +0530 Subject: [PATCH] Adding code repository full BE implementation --- .../com/olympus/apollo/ApolloApplication.java | 2 + .../controllers/FeApi/KSGitController.java | 78 +++ .../apollo/controllers/TestController.java | 103 ++-- .../com/olympus/apollo/dto/KSGitInfoDTO.java | 22 + .../olympus/apollo/dto/KSGitUploadDTO.java | 19 + .../com/olympus/apollo/models/KSGitInfo.java | 26 + .../apollo/models/KSGitIngestionInfo.java | 21 + .../repository/KSGitInfoRepository.java | 15 + .../KSGitIngestionInfoRepository.java | 10 + .../services/FileSystemStorageService.java | 6 + .../services/GitRepositoryIngestor.java | 457 ++++++++++++++---- .../apollo/services/KSGitInfoService.java | 28 ++ .../apollo/services/StorageProperties.java | 4 +- src/main/resources/application.properties | 4 +- 14 files changed, 653 insertions(+), 142 deletions(-) create mode 100644 src/main/java/com/olympus/apollo/controllers/FeApi/KSGitController.java create mode 100644 src/main/java/com/olympus/apollo/dto/KSGitInfoDTO.java create mode 100644 src/main/java/com/olympus/apollo/dto/KSGitUploadDTO.java create mode 100644 src/main/java/com/olympus/apollo/models/KSGitInfo.java create mode 100644 src/main/java/com/olympus/apollo/models/KSGitIngestionInfo.java create mode 100644 src/main/java/com/olympus/apollo/repository/KSGitInfoRepository.java create mode 100644 src/main/java/com/olympus/apollo/repository/KSGitIngestionInfoRepository.java create mode 100644 src/main/java/com/olympus/apollo/services/KSGitInfoService.java diff --git a/src/main/java/com/olympus/apollo/ApolloApplication.java b/src/main/java/com/olympus/apollo/ApolloApplication.java index 07ce42c..f0b54c5 100644 --- a/src/main/java/com/olympus/apollo/ApolloApplication.java +++ b/src/main/java/com/olympus/apollo/ApolloApplication.java @@ -3,11 +3,13 @@ package com.olympus.apollo; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.scheduling.annotation.EnableAsync; import com.olympus.apollo.services.StorageProperties; @SpringBootApplication @EnableConfigurationProperties(StorageProperties.class) +@EnableAsync public class ApolloApplication { public static void main(String[] args) { diff --git a/src/main/java/com/olympus/apollo/controllers/FeApi/KSGitController.java b/src/main/java/com/olympus/apollo/controllers/FeApi/KSGitController.java new file mode 100644 index 0000000..43242dc --- /dev/null +++ b/src/main/java/com/olympus/apollo/controllers/FeApi/KSGitController.java @@ -0,0 +1,78 @@ +package com.olympus.apollo.controllers.FeApi; + +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashMap; +import java.util.List; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.CrossOrigin; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import com.olympus.apollo.dto.KSGitInfoDTO; +import com.olympus.apollo.dto.KSGitUploadDTO; +import com.olympus.apollo.models.KSGitInfo; +import com.olympus.apollo.models.KSGitIngestionInfo; +import com.olympus.apollo.repository.KSGitInfoRepository; +import com.olympus.apollo.repository.KSGitIngestionInfoRepository; +import com.olympus.apollo.services.KSGitInfoService; + +import io.swagger.v3.oas.annotations.parameters.RequestBody; + +@CrossOrigin(origins = "http://localhost:5173") +@RestController +@RequestMapping("/fe-api/ks_git_repos") +public class KSGitController { + + @Autowired + private KSGitInfoRepository ksGitInfoRepository; + @Autowired + private KSGitIngestionInfoRepository ksGitIngestionInfoRepository; + + @GetMapping("") + public List listGitInfo() { + List result = (List) ksGitInfoRepository.findAll(); + return result; + } + + @PostMapping("/uploadRepo") + public ResponseEntity handleGitUpload(@RequestBody KSGitUploadDTO ksGitUploadDTO) { + + KSGitInfo ksGitInfo = new KSGitInfo(); + ksGitInfo.setRepoName(ksGitUploadDTO.getRepoName()); + ksGitInfo.setBranch(ksGitUploadDTO.getBranch()); + ksGitInfo.setCommitId(ksGitUploadDTO.getCommitId()); + ksGitInfo.setRepoPath(ksGitUploadDTO.getRepoPath()); + ksGitInfo.setIngestionStatus("NEW"); + ksGitInfo.setIngestionDate(new Date()); + ksGitInfo.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date())); + + KSGitIngestionInfo ksGitIngestionInfo = new KSGitIngestionInfo(); + HashMap metadata = new HashMap<>(); + + metadata.put("KsApplicationName", ksGitUploadDTO.getRepoName()); + metadata.put("KsDoctype", "gitrepository"); + metadata.put("KsDocSource", "gitlab"); + metadata.put("KsFileSource", ksGitUploadDTO.getRepoName()); + + metadata.put("KsBranch", ksGitUploadDTO.getBranch()); + metadata.put("KsRepoName", ksGitUploadDTO.getRepoName()); + + ksGitIngestionInfo.setMetadata(metadata); + ksGitIngestionInfo.setMinChunkSizeToEmbed(ksGitUploadDTO.getMinChunkSizeToEmbed()); + ksGitIngestionInfo.setMaxNumberOfChunks(ksGitUploadDTO.getMaxNumberOfChunks()); + ksGitIngestionInfo.setMinChunkSize(ksGitUploadDTO.getMinChunkSize()); + ksGitIngestionInfo.setDefaultChunkSize(ksGitUploadDTO.getDefaultChunkSize()); + + ksGitIngestionInfoRepository.save(ksGitIngestionInfo); + ksGitInfo.setKsGitIngestionInfo(ksGitIngestionInfo); + ksGitInfoRepository.save(ksGitInfo); + + return ResponseEntity.ok("Upload successful"); + } + +} diff --git a/src/main/java/com/olympus/apollo/controllers/TestController.java b/src/main/java/com/olympus/apollo/controllers/TestController.java index 761aabd..d9d015b 100644 --- a/src/main/java/com/olympus/apollo/controllers/TestController.java +++ b/src/main/java/com/olympus/apollo/controllers/TestController.java @@ -1,14 +1,19 @@ package com.olympus.apollo.controllers; -import java.util.HashMap; import java.util.List; -import com.olympus.apollo.dto.IngestionOutput; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.*; +import org.springframework.web.bind.annotation.CrossOrigin; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; -import com.olympus.apollo.models.KSIngestionInfo; +import com.olympus.apollo.dto.IngestionOutput; import com.olympus.apollo.services.GitRepositoryIngestor; import com.olympus.apollo.services.KSIngestor; @@ -16,61 +21,63 @@ import com.olympus.apollo.services.KSIngestor; @RestController public class TestController { - @Autowired - KSIngestor ksIngestor; + @Autowired + private KSIngestor ksIngestor; - @Autowired - GitRepositoryIngestor gitRepositoryIngestor; + @Autowired + GitRepositoryIngestor gitRepositoryIngestor; - - @GetMapping("test/ingestion_loop") - public IngestionOutput testIngestionLoop() { - return ksIngestor.ingestLoop(); - } + private static final Logger logger = LoggerFactory.getLogger(TestController.class); - @GetMapping("test/ingest_document/{id}") - public IngestionOutput ingestDocumentById(@PathVariable String id) { - return ksIngestor.ingestDocumentById(id); - } + @GetMapping("test/ingestion_loop") + public IngestionOutput testIngestionLoop() { + return ksIngestor.ingestLoop(); + } - @GetMapping("test/query_vector") - public List testSimilaritySearch(@RequestParam String query, @RequestParam String type) { - return ksIngestor.testSimilaritySearch(query,type); - } + @GetMapping("test/ingest_document/{id}") + public IngestionOutput ingestDocumentById(@PathVariable String id) { + return ksIngestor.ingestDocumentById(id); + } - @GetMapping("test/delete") - public String deleteAllFromVectore(@RequestParam String query) { - ksIngestor.deleteAll("3-automated-test-framework---atf.md"); - return "Deleted"; - } + @GetMapping("test/query_vector") + public List testSimilaritySearch(@RequestParam String query, @RequestParam String type) { + return ksIngestor.testSimilaritySearch(query, type); + } - @GetMapping("test/ingest_repo") - public String ingestRepo() { - try { + @GetMapping("test/delete") + public String deleteAllFromVectore(@RequestParam String query) { + ksIngestor.deleteAll("3-automated-test-framework---atf.md"); + return "Deleted"; + } - KSIngestionInfo ksIngestionInfo = new KSIngestionInfo(); - + @GetMapping("test/ingest_repo/{repoName}") + public ResponseEntity ingestRepo(@PathVariable String repoName) { + try { + gitRepositoryIngestor.ingestGitRepository(repoName); + return ResponseEntity.ok("Ingestion Started"); + } catch (Exception e) { - HashMap metadata = new HashMap<>(); - - metadata.put("KsApplicatioName","doo"); - metadata.put("KsDoctype","sourcecode"); - metadata.put("KsDoSource","GIT"); - ksIngestionInfo.setMetadata(metadata); - ksIngestionInfo.setDefaultChunkSize(6000); - ksIngestionInfo.setMinChunkSize(200); - ksIngestionInfo.setMaxNumberOfChunks(10000); - ksIngestionInfo.setMinChunkSizeToEmbed(100); + logger.error("Error during ingestion start", e); + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR) + .body("Error starting ingestion: " + e.getMessage()); + } + } - String repoPath = "C:\\Users\\andrea.terzani\\dev\\DOO2_CLOUD"; - gitRepositoryIngestor.ingestGitRepository(repoPath, ksIngestionInfo); + @GetMapping("test/check_ingestion_status/{repoName}") + public ResponseEntity checkIngestionStatus(@PathVariable String repoName) { + try { + IngestionOutput ingestionOutput = gitRepositoryIngestor.checkIngestionStatus(repoName); + return ResponseEntity.ok(ingestionOutput); + } catch (Exception e) { + logger.error("Error checking ingestion status", e); - return "Ingested"; - } catch (Exception e) { - return "Error"; - } - } + IngestionOutput errorOutput = new IngestionOutput(); + errorOutput.setStatus("ERROR"); + errorOutput.setMessage("Error checking ingestion status: " + e.getMessage()); + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorOutput); + } + } } diff --git a/src/main/java/com/olympus/apollo/dto/KSGitInfoDTO.java b/src/main/java/com/olympus/apollo/dto/KSGitInfoDTO.java new file mode 100644 index 0000000..e224f7a --- /dev/null +++ b/src/main/java/com/olympus/apollo/dto/KSGitInfoDTO.java @@ -0,0 +1,22 @@ +package com.olympus.apollo.dto; + +import lombok.Getter; +import lombok.Setter; + +import java.util.Date; + +@Getter @Setter +public class KSGitInfoDTO { + private String repoName; + private String branch; + private String ingestionStatus; + private Date ingestionDate; + + public KSGitInfoDTO(String repoName, String branch, String ingestionStatus, Date ingestionDate) { + this.repoName = repoName; + this.branch = branch; + this.ingestionStatus = ingestionStatus; + this.ingestionDate = ingestionDate; + + } +} diff --git a/src/main/java/com/olympus/apollo/dto/KSGitUploadDTO.java b/src/main/java/com/olympus/apollo/dto/KSGitUploadDTO.java new file mode 100644 index 0000000..e02336d --- /dev/null +++ b/src/main/java/com/olympus/apollo/dto/KSGitUploadDTO.java @@ -0,0 +1,19 @@ +package com.olympus.apollo.dto; + +import java.util.HashMap; + +import lombok.Getter; +import lombok.Setter; + +@Getter @Setter +public class KSGitUploadDTO { + private String repoName; + private String branch; + private String commitId; + private String repoPath; + private HashMap metadata; + private int minChunkSizeToEmbed; + private int maxNumberOfChunks; + private int minChunkSize; + private int defaultChunkSize; +} diff --git a/src/main/java/com/olympus/apollo/models/KSGitInfo.java b/src/main/java/com/olympus/apollo/models/KSGitInfo.java new file mode 100644 index 0000000..b027de4 --- /dev/null +++ b/src/main/java/com/olympus/apollo/models/KSGitInfo.java @@ -0,0 +1,26 @@ +package com.olympus.apollo.models; + +import java.util.Date; + +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; + +import lombok.Getter; +import lombok.Setter; + +@Document(collection = "ksgit_info") +@Getter +@Setter +public class KSGitInfo { + @Id + private String id; + + private String repoName; + private String branch; + private String commitId; + private String repoPath; + private KSGitIngestionInfo ksGitIngestionInfo; + private String ingestionStatus; + private Date ingestionDate; + private String ingestionDateFormat; +} diff --git a/src/main/java/com/olympus/apollo/models/KSGitIngestionInfo.java b/src/main/java/com/olympus/apollo/models/KSGitIngestionInfo.java new file mode 100644 index 0000000..0a1dfee --- /dev/null +++ b/src/main/java/com/olympus/apollo/models/KSGitIngestionInfo.java @@ -0,0 +1,21 @@ +package com.olympus.apollo.models; + +import java.util.HashMap; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; +import lombok.Getter; +import lombok.Setter; + +@Document(collection = "ksgit_ingestioninfo") +@Getter @Setter +public class KSGitIngestionInfo { + @Id + private String id; + + private String type; + private HashMap metadata; + private int minChunkSizeToEmbed; + private int maxNumberOfChunks; + private int minChunkSize; + private int defaultChunkSize; +} diff --git a/src/main/java/com/olympus/apollo/repository/KSGitInfoRepository.java b/src/main/java/com/olympus/apollo/repository/KSGitInfoRepository.java new file mode 100644 index 0000000..f40b017 --- /dev/null +++ b/src/main/java/com/olympus/apollo/repository/KSGitInfoRepository.java @@ -0,0 +1,15 @@ +package com.olympus.apollo.repository; + +import java.util.Optional; + +import org.springframework.data.mongodb.repository.MongoRepository; +import org.springframework.data.rest.core.annotation.RepositoryRestResource; +import org.springframework.stereotype.Repository; +import org.springframework.web.bind.annotation.CrossOrigin; + +import com.olympus.apollo.models.KSGitInfo; + +@Repository +public interface KSGitInfoRepository extends MongoRepository { + Optional findByRepoName(String repoName); +} diff --git a/src/main/java/com/olympus/apollo/repository/KSGitIngestionInfoRepository.java b/src/main/java/com/olympus/apollo/repository/KSGitIngestionInfoRepository.java new file mode 100644 index 0000000..705242d --- /dev/null +++ b/src/main/java/com/olympus/apollo/repository/KSGitIngestionInfoRepository.java @@ -0,0 +1,10 @@ +package com.olympus.apollo.repository; + +import org.springframework.data.mongodb.repository.MongoRepository; +import org.springframework.stereotype.Repository; + +import com.olympus.apollo.models.KSGitIngestionInfo; + +@Repository +public interface KSGitIngestionInfoRepository extends MongoRepository { +} diff --git a/src/main/java/com/olympus/apollo/services/FileSystemStorageService.java b/src/main/java/com/olympus/apollo/services/FileSystemStorageService.java index 0e90232..17c776c 100644 --- a/src/main/java/com/olympus/apollo/services/FileSystemStorageService.java +++ b/src/main/java/com/olympus/apollo/services/FileSystemStorageService.java @@ -7,6 +7,11 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.springframework.beans.factory.annotation.Autowired; @@ -110,5 +115,6 @@ public class FileSystemStorageService implements StorageService { throw new StorageException("Could not initialize storage", e); } } + } diff --git a/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java b/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java index 36920f8..fd7ec0e 100644 --- a/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java +++ b/src/main/java/com/olympus/apollo/services/GitRepositoryIngestor.java @@ -1,121 +1,398 @@ package com.olympus.apollo.services; +import java.io.File; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.eclipse.jgit.api.Git; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.treewalk.TreeWalk; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.ai.document.Document; import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; -import com.olympus.apollo.models.KSIngestionInfo; - -import java.io.File; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import com.olympus.apollo.dto.IngestionOutput; +import com.olympus.apollo.models.KSGitInfo; +import com.olympus.apollo.models.KSGitIngestionInfo; +import com.olympus.apollo.repository.KSGitInfoRepository; @Service public class GitRepositoryIngestor { - private final VectorStore vectorStore; + private final VectorStore vectorStore; - public GitRepositoryIngestor( VectorStore vectorStore) { - this.vectorStore = vectorStore; - } + @Autowired + private KSGitInfoRepository ksGitInfoRepository; - public void ingestGitRepository(String repoPath,KSIngestionInfo ingestionInfo) throws Exception { - try (Git git = Git.open(new File(repoPath))) { - Repository repository = git.getRepository(); - RevCommit latestCommit = git.log().setMaxCount(1).call().iterator().next(); + public GitRepositoryIngestor(VectorStore vectorStore) { + this.vectorStore = vectorStore; + } - try (TreeWalk treeWalk = new TreeWalk(repository)) { - treeWalk.addTree(latestCommit.getTree()); - treeWalk.setRecursive(true); + Logger logger = LoggerFactory.getLogger(GitRepositoryIngestor.class); - List documents = new ArrayList<>(); + @Async + public CompletableFuture ingestGitRepository(String repo) { + String repoPath = "C:\\Users\\vinayak.c.mishra\\dev\\olympus\\upload-dir\\" + repo + "\\"; + logger.info("Repository path : " + repoPath); - while (treeWalk.next()) { - String filePath = treeWalk.getPathString(); - String fileName = treeWalk.getNameString(); - - if (isRelevantFile(fileName)) { - byte[] fileContent = repository.open(treeWalk.getObjectId(0)).getBytes(); - String fileContentStr = new String(fileContent, StandardCharsets.UTF_8); - - Map metadata = extractMetadata(fileName, fileContentStr); - metadata.put("filePath", filePath); - metadata.put("fileName", fileName); - - Document doc = new Document(fileContentStr); - doc.getMetadata().putAll(metadata); - - doc.getMetadata().putAll(ingestionInfo.getMetadata()); - documents.add(doc); - } - } - - - TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(), - ingestionInfo.getMinChunkSize(), - ingestionInfo.getMinChunkSizeToEmbed(), - ingestionInfo.getMaxNumberOfChunks(), - false); - - List splitDocuments = splitter.split(documents); - - vectorStore.add(splitDocuments); + Optional optionalDocument = ksGitInfoRepository.findByRepoName(repo); + if (optionalDocument.isPresent()) { + KSGitInfo ksGitInfo = optionalDocument.get(); + if ("NEW".equals(ksGitInfo.getIngestionStatus())) { + ingestRepo(repoPath, ksGitInfo); + } else { + logger.info("OOPS: Document is already Injected"); } + } else { + logger.info("OOPS: Document Not found"); } + return CompletableFuture.completedFuture(null); } - private boolean isRelevantFile(String fileName) { - // Add more relevant file extensions as needed - return fileName.endsWith(".java"); - } + private void ingestRepo(String repoPath, KSGitInfo ksGitInfo) { + try (Git git = Git.open(new File(repoPath))) { + ksGitInfo.setIngestionStatus("IN PROGRESS"); - private Map extractMetadata(String fileName, String fileContent) { - Map metadata = new HashMap<>(); + KSGitIngestionInfo ingestionInfo = ksGitInfo.getKsGitIngestionInfo(); + logger.info("Metadata : " + ingestionInfo.getMetadata()); + ksGitInfoRepository.save(ksGitInfo); - if (fileName.endsWith(".java")) { - metadata.putAll(extractJavaMetadata(fileContent)); - } else if (fileName.endsWith(".py")) { - metadata.putAll(extractPythonMetadata(fileContent)); - } else if (fileName.endsWith(".js")) { - metadata.putAll(extractJavaScriptMetadata(fileContent)); - } + Repository repository = git.getRepository(); + RevCommit latestCommit = git.log().setMaxCount(1).call().iterator().next(); - return metadata; - } + try (TreeWalk treeWalk = new TreeWalk(repository)) { + treeWalk.addTree(latestCommit.getTree()); + treeWalk.setRecursive(true); - private Map extractJavaMetadata(String fileContent) { - Map metadata = new HashMap<>(); - // Simple regex to find class names (this is a basic implementation and might miss some cases) - Pattern classPattern = Pattern.compile("class\\s+(\\w+)"); - Matcher classMatcher = classPattern.matcher(fileContent); - List classNames = new ArrayList<>(); - while (classMatcher.find()) { - classNames.add(classMatcher.group(1)); - } - metadata.put("classNames", String.join(",", classNames)); - return metadata; - } + List documents = new ArrayList<>(); - private Map extractPythonMetadata(String fileContent) { - // Implement Python-specific metadata extraction - // This is a placeholder and should be implemented based on your needs - return new HashMap<>(); - } + while (treeWalk.next()) { + String filePath = treeWalk.getPathString(); + String fileName = treeWalk.getNameString(); + + if (isRelevantFile(fileName)) { + byte[] fileContent = repository.open(treeWalk.getObjectId(0)).getBytes(); + String fileContentStr = new String(fileContent, StandardCharsets.UTF_8); + + Map metadata = extractMetadata(fileName, fileContentStr); + metadata.put("filePath", filePath); + metadata.put("fileName", fileName); + + Document doc = new Document(fileContentStr); + doc.getMetadata().putAll(metadata); + + doc.getMetadata().putAll(ingestionInfo.getMetadata()); + documents.add(doc); + } + } + + TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(), + ingestionInfo.getMinChunkSize(), ingestionInfo.getMinChunkSizeToEmbed(), + ingestionInfo.getMaxNumberOfChunks(), false); + + List splitDocuments = splitter.split(documents); + logger.info("Number of documents: " + splitDocuments.size()); + vectorStore.add(splitDocuments); + logger.info("Documents embedded"); + } + + ksGitInfo.setIngestionStatus("INGESTED"); + ksGitInfo.setIngestionDate(new Date()); + ksGitInfoRepository.save(ksGitInfo); + } catch (Exception e) { + ksGitInfo.setIngestionStatus("ERROR"); + ksGitInfoRepository.save(ksGitInfo); + logger.error("Error during ingestion", e); + } + } + + public IngestionOutput checkIngestionStatus(String repoName) { + Optional optionalDocument = ksGitInfoRepository.findByRepoName(repoName); + IngestionOutput ingestionOutput = new IngestionOutput(); + if (optionalDocument.isPresent()) { + KSGitInfo ksGitInfo = optionalDocument.get(); + ingestionOutput.setStatus(ksGitInfo.getIngestionStatus()); + ingestionOutput.setMessage("Status Retrieved"); + if ("INGESTED".equals(ksGitInfo.getIngestionStatus())) { + ingestionOutput.getIngestedDocumentId().add(ksGitInfo.getId()); + } + } else { + ingestionOutput.setStatus("ERROR"); + ingestionOutput.setMessage("Document Not Found"); + } + return ingestionOutput; + } + + + private boolean isRelevantFile(String fileName) { + // Add more relevant file extensions as needed + boolean response = false; + if (fileName.endsWith(".java")) { + response = true; + } else if (fileName.endsWith(".py")) { + response = true; + } else if (fileName.endsWith(".js")) { + response = true; + } else if (fileName.endsWith(".vue")) { + response = true; + } else if (fileName.endsWith(".groovy") || fileName.endsWith(".jenkins") || fileName.endsWith(".jenkinsfile")) { + response = true; + } + return response; + } + + private Map extractMetadata(String fileName, String fileContent) { + Map metadata = new HashMap<>(); + + if (fileName.endsWith(".java")) { + metadata.putAll(extractJavaMetadata(fileContent)); + } else if (fileName.endsWith(".py")) { + metadata.putAll(extractPythonMetadata(fileContent)); + } else if (fileName.endsWith(".js")) { + metadata.putAll(extractJavaScriptMetadata(fileContent)); + } else if (fileName.endsWith(".vue")) { + metadata.putAll(extractVueMetadata(fileContent)); + } else if (fileName.endsWith(".groovy") || fileName.endsWith(".jenkins") || fileName.endsWith(".jenkinsfile")) { + metadata.putAll(extractGroovyMetadata(fileContent)); + } + return metadata; + } + + /* + * private Map extractJavaMetadata(String fileContent) { + * Map metadata = new HashMap<>(); // Simple regex to find class + * names (this is a basic implementation and might // miss some cases) Pattern + * classPattern = Pattern.compile("class\\s+(\\w+)"); Matcher classMatcher = + * classPattern.matcher(fileContent); List classNames = new + * ArrayList<>(); while (classMatcher.find()) { + * classNames.add(classMatcher.group(1)); } metadata.put("classNames", + * String.join(",", classNames)); return metadata; } + */ + + private Map extractJavaMetadata(String fileContent) { + Map metadata = new HashMap<>(); + + // Extract package name + Pattern packagePattern = Pattern.compile("package\\s+([\\w\\.]+);"); + Matcher packageMatcher = packagePattern.matcher(fileContent); + if (packageMatcher.find()) { + metadata.put("packageName", packageMatcher.group(1)); + } + + // Extract class names + Pattern classPattern = Pattern.compile("\\bclass\\s+(\\w+)"); + Matcher classMatcher = classPattern.matcher(fileContent); + List classNames = new ArrayList<>(); + while (classMatcher.find()) { + classNames.add(classMatcher.group(1)); + } + metadata.put("classNames", String.join(",", classNames)); + + // Extract method names + Pattern methodPattern = Pattern.compile("\\b(?:public|protected|private|static|\\s)\\s*\\w+\\s+(\\w+)\\s*\\("); + Matcher methodMatcher = methodPattern.matcher(fileContent); + List methodNames = new ArrayList<>(); + while (methodMatcher.find()) { + methodNames.add(methodMatcher.group(1)); + } + metadata.put("methodNames", String.join(",", methodNames)); + + // Extract import statements + Pattern importPattern = Pattern.compile("import\\s+([\\w\\.\\*]+);"); + Matcher importMatcher = importPattern.matcher(fileContent); + List importStatements = new ArrayList<>(); + while (importMatcher.find()) { + importStatements.add(importMatcher.group(1)); + } + metadata.put("importStatements", String.join(",", importStatements)); + + return metadata; + } + + private Map extractPythonMetadata(String fileContent) { + Map metadata = new HashMap<>(); + + // Extract class names + Pattern classPattern = Pattern.compile("\\bclass\\s+(\\w+)"); + Matcher classMatcher = classPattern.matcher(fileContent); + List classNames = new ArrayList<>(); + while (classMatcher.find()) { + classNames.add(classMatcher.group(1)); + } + metadata.put("classNames", String.join(",", classNames)); + + // Extract function names + Pattern functionPattern = Pattern.compile("\\bdef\\s+(\\w+)\\s*\\("); + Matcher functionMatcher = functionPattern.matcher(fileContent); + List functionNames = new ArrayList<>(); + while (functionMatcher.find()) { + functionNames.add(functionMatcher.group(1)); + } + metadata.put("functionNames", String.join(",", functionNames)); + + // Extract import statements + Pattern importPattern = Pattern.compile("\\bimport\\s+([\\w\\.]+)|\\bfrom\\s+([\\w\\.]+)\\s+import"); + Matcher importMatcher = importPattern.matcher(fileContent); + List importStatements = new ArrayList<>(); + while (importMatcher.find()) { + if (importMatcher.group(1) != null) { + importStatements.add(importMatcher.group(1)); + } else if (importMatcher.group(2) != null) { + importStatements.add(importMatcher.group(2)); + } + } + metadata.put("importStatements", String.join(",", importStatements)); + + return metadata; + } + + private Map extractJavaScriptMetadata(String fileContent) { + Map metadata = new HashMap<>(); + + // Extract function names + Pattern functionPattern = Pattern.compile("\\bfunction\\s+(\\w+)\\s*\\("); + Matcher functionMatcher = functionPattern.matcher(fileContent); + List functionNames = new ArrayList<>(); + while (functionMatcher.find()) { + functionNames.add(functionMatcher.group(1)); + } + metadata.put("functionNames", String.join(",", functionNames)); + + // Extract class names + Pattern classPattern = Pattern.compile("\\bclass\\s+(\\w+)"); + Matcher classMatcher = classPattern.matcher(fileContent); + List classNames = new ArrayList<>(); + while (classMatcher.find()) { + classNames.add(classMatcher.group(1)); + } + metadata.put("classNames", String.join(",", classNames)); + + // Extract import statements + Pattern importPattern = Pattern.compile("\\bimport\\s+[^;]+\\s+from\\s+['\"]([\\w\\.\\/-]+)['\"]"); + Matcher importMatcher = importPattern.matcher(fileContent); + List importStatements = new ArrayList<>(); + while (importMatcher.find()) { + importStatements.add(importMatcher.group(1)); + } + metadata.put("importStatements", String.join(",", importStatements)); + + return metadata; + } + + private Map extractVueMetadata(String fileContent) { + Map metadata = new HashMap<>(); + + // Extract component name + Pattern namePattern = Pattern.compile("