Aggiunta file di configurazione Tika
This commit is contained in:
2
pom.xml
2
pom.xml
@@ -103,6 +103,8 @@
|
|||||||
<artifactId>spring-ai-tika-document-reader</artifactId>
|
<artifactId>spring-ai-tika-document-reader</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springdoc</groupId>
|
<groupId>org.springdoc</groupId>
|
||||||
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
|
||||||
|
|||||||
@@ -2,24 +2,29 @@ package com.olympus.apollo.controllers;
|
|||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import com.olympus.apollo.feign.services.REModuleService;
|
|
||||||
import com.olympus.dto.ResultDTO;
|
|
||||||
import com.olympus.apollo.feign.services.ParserModuleService;
|
|
||||||
import com.olympus.apollo.services.GitService;
|
|
||||||
import com.olympus.dto.CommonParseRequest;
|
|
||||||
import com.olympus.dto.ApolloParseRequestDTO;
|
|
||||||
import com.olympus.feign.JavaParserModule;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.ai.document.Document;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.http.HttpStatus;
|
import org.springframework.http.HttpStatus;
|
||||||
import org.springframework.http.ResponseEntity;
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.web.bind.annotation.*;
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.PathVariable;
|
||||||
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
|
import org.springframework.web.bind.annotation.RequestParam;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
|
||||||
import com.olympus.dto.IngestionOutput;
|
import com.olympus.apollo.feign.services.ParserModuleService;
|
||||||
|
import com.olympus.apollo.feign.services.REModuleService;
|
||||||
import com.olympus.apollo.services.GitRepositoryIngestor;
|
import com.olympus.apollo.services.GitRepositoryIngestor;
|
||||||
|
import com.olympus.apollo.services.GitService;
|
||||||
import com.olympus.apollo.services.KSIngestor;
|
import com.olympus.apollo.services.KSIngestor;
|
||||||
import org.springframework.ai.document.Document;
|
import com.olympus.dto.ApolloParseRequestDTO;
|
||||||
|
import com.olympus.dto.CommonParseRequest;
|
||||||
|
import com.olympus.dto.IngestionOutput;
|
||||||
|
import com.olympus.dto.ResultDTO;
|
||||||
|
import com.olympus.feign.JavaParserModule;
|
||||||
|
|
||||||
|
|
||||||
@RestController
|
@RestController
|
||||||
@@ -52,7 +57,7 @@ public class TestController {
|
|||||||
|
|
||||||
@GetMapping("test/ingest_document/{id}")
|
@GetMapping("test/ingest_document/{id}")
|
||||||
public IngestionOutput ingestDocumentById(@PathVariable String id) {
|
public IngestionOutput ingestDocumentById(@PathVariable String id) {
|
||||||
return ksIngestor.ingestDocumentById(id);
|
return ksIngestor.ingestDocumentByIdAsync(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@GetMapping("test/query_vector")
|
@GetMapping("test/query_vector")
|
||||||
|
|||||||
@@ -1,29 +1,30 @@
|
|||||||
package com.olympus.apollo.services;
|
package com.olympus.apollo.services;
|
||||||
|
|
||||||
import com.olympus.dto.DeleteGitRepoDetailsRequest;
|
import java.util.Date;
|
||||||
import com.olympus.dto.ResultDTO;
|
import java.util.List;
|
||||||
import com.olympus.apollo.exception.vectorStoreMetaDetailsEmptyException;
|
import java.util.Optional;
|
||||||
import com.olympus.apollo.repository.*;
|
import java.util.concurrent.CompletableFuture;
|
||||||
import com.olympus.model.apollo.KSGitInfo;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import java.util.Date;
|
import org.springframework.ai.vectorstore.VectorStore;
|
||||||
|
|
||||||
import com.olympus.dto.DeletionRequest;
|
|
||||||
import org.springframework.ai.document.Document;
|
|
||||||
import org.springframework.ai.vectorstore.SearchRequest;
|
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.messaging.simp.SimpMessagingTemplate;
|
import org.springframework.messaging.simp.SimpMessagingTemplate;
|
||||||
import org.springframework.scheduling.annotation.Async;
|
import org.springframework.scheduling.annotation.Async;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.ai.vectorstore.VectorStore;
|
|
||||||
|
import com.olympus.apollo.exception.vectorStoreMetaDetailsEmptyException;
|
||||||
|
import com.olympus.apollo.repository.KSDocumentRepository;
|
||||||
|
import com.olympus.apollo.repository.KSGitInfoRepository;
|
||||||
|
import com.olympus.apollo.repository.KSGitIngestionInfoRepository;
|
||||||
|
import com.olympus.apollo.repository.KSIngestionInfoRepository;
|
||||||
|
import com.olympus.apollo.repository.KSTextsRepository;
|
||||||
|
import com.olympus.apollo.repository.VectorStoreRepository;
|
||||||
|
import com.olympus.dto.DeleteGitRepoDetailsRequest;
|
||||||
|
import com.olympus.dto.DeletionRequest;
|
||||||
|
import com.olympus.dto.ResultDTO;
|
||||||
import com.olympus.model.apollo.KSDocument;
|
import com.olympus.model.apollo.KSDocument;
|
||||||
|
import com.olympus.model.apollo.KSGitInfo;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.concurrent.CompletableFuture;
|
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
public class DeletionService {
|
public class DeletionService {
|
||||||
@@ -58,21 +59,6 @@ public class DeletionService {
|
|||||||
public void deleteRecords(DeletionRequest deletionRequest) {
|
public void deleteRecords(DeletionRequest deletionRequest) {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
//TODO: COMPLETE REFACTOR REQUIRED TO DELETE RECORD FROM AZURE SEARCH
|
|
||||||
// NOT WORKING AT THE MOMENT
|
|
||||||
// boolean KSDocumentExists = deletionRequest.getKsDocumentId() != null &&
|
|
||||||
// !deletionRequest.getKsDocumentId().isEmpty() &&
|
|
||||||
// ksDocumentRepository.existsById(deletionRequest.getKsDocumentId());
|
|
||||||
// if(KSDocumentExists){
|
|
||||||
// SearchRequest searchRequest = SearchRequest.defaults()
|
|
||||||
// .withQuery("a").withTopK(1000)
|
|
||||||
// .withSimilarityThreshold(0.0)
|
|
||||||
// .withFilterExpression("KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'");
|
|
||||||
|
|
||||||
|
|
||||||
// List<Document> docs = vectorStore.similaritySearch(searchRequest);
|
|
||||||
// List<String> ids = docs.stream().map(Document::getId).toList();
|
|
||||||
// vectorStore.delete(ids);
|
|
||||||
String rag_filter = "KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'";
|
String rag_filter = "KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'";
|
||||||
logger.info("Starting deletion");
|
logger.info("Starting deletion");
|
||||||
vectorStore.delete(rag_filter);
|
vectorStore.delete(rag_filter);
|
||||||
@@ -92,21 +78,7 @@ public class DeletionService {
|
|||||||
public void deleteRecordsOnlyFromVectorStore(DeletionRequest deletionRequest) {
|
public void deleteRecordsOnlyFromVectorStore(DeletionRequest deletionRequest) {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
//TODO: COMPLETE REFACTOR REQUIRED TO DELETE RECORD FROM AZURE SEARCH
|
|
||||||
// NOT WORKING AT THE MOMENT
|
|
||||||
// boolean KSDocumentExists = deletionRequest.getKsDocumentId() != null &&
|
|
||||||
// !deletionRequest.getKsDocumentId().isEmpty() &&
|
|
||||||
// ksDocumentRepository.existsById(deletionRequest.getKsDocumentId());
|
|
||||||
// if(KSDocumentExists){
|
|
||||||
// SearchRequest searchRequest = SearchRequest.defaults()
|
|
||||||
// .withQuery("a").withTopK(1000)
|
|
||||||
// .withSimilarityThreshold(0.0)
|
|
||||||
// .withFilterExpression("KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'");
|
|
||||||
|
|
||||||
|
|
||||||
// List<Document> docs = vectorStore.similaritySearch(searchRequest);
|
|
||||||
// List<String> ids = docs.stream().map(Document::getId).toList();
|
|
||||||
// vectorStore.delete(ids);
|
|
||||||
String rag_filter = "KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'";
|
String rag_filter = "KsDocumentId=='"+deletionRequest.getKsDocumentId()+"'";
|
||||||
logger.info("Starting deletion");
|
logger.info("Starting deletion");
|
||||||
vectorStore.delete(rag_filter);
|
vectorStore.delete(rag_filter);
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import java.nio.file.Path;
|
|||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.tomcat.util.openssl.openssl_h;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
@@ -18,7 +17,6 @@ import org.springframework.security.core.context.SecurityContextHolder;
|
|||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.olympus.apollo.repository.KSDocumentRepository;
|
import com.olympus.apollo.repository.KSDocumentRepository;
|
||||||
import com.olympus.apollo.repository.ProjectRepository;
|
|
||||||
import com.olympus.apollo.security.entity.User;
|
import com.olympus.apollo.security.entity.User;
|
||||||
import com.olympus.model.apollo.KSDocument;
|
import com.olympus.model.apollo.KSDocument;
|
||||||
|
|
||||||
@@ -31,7 +29,6 @@ public class KSDocumentService {
|
|||||||
private KSDocumentRepository ksdocRepo;
|
private KSDocumentRepository ksdocRepo;
|
||||||
|
|
||||||
public List<KSDocument> findByProjectNameAndApplicationName() {
|
public List<KSDocument> findByProjectNameAndApplicationName() {
|
||||||
logger.info("findByProjectNameAndApplicationName function:");
|
|
||||||
User principal = (User) SecurityContextHolder.getContext().getAuthentication().getPrincipal();
|
User principal = (User) SecurityContextHolder.getContext().getAuthentication().getPrincipal();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -1,28 +1,33 @@
|
|||||||
package com.olympus.apollo.services;
|
package com.olympus.apollo.services;
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
|
||||||
import com.olympus.dto.IngestionOutput;
|
|
||||||
import com.olympus.model.apollo.KSDocument;
|
|
||||||
import com.olympus.model.apollo.KSTexts;
|
|
||||||
import com.olympus.apollo.repository.KSTextsRepository;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.ai.document.Document;
|
import org.springframework.ai.document.Document;
|
||||||
import org.springframework.ai.vectorstore.SearchRequest.Builder;
|
|
||||||
import org.springframework.ai.reader.tika.TikaDocumentReader;
|
import org.springframework.ai.reader.tika.TikaDocumentReader;
|
||||||
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
||||||
import org.springframework.ai.vectorstore.SearchRequest;
|
import org.springframework.ai.vectorstore.SearchRequest;
|
||||||
|
import org.springframework.ai.vectorstore.SearchRequest.Builder;
|
||||||
import org.springframework.ai.vectorstore.VectorStore;
|
import org.springframework.ai.vectorstore.VectorStore;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.core.io.Resource;
|
import org.springframework.core.io.Resource;
|
||||||
|
import org.springframework.scheduling.annotation.Async;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.olympus.model.apollo.KSIngestionInfo;
|
|
||||||
import com.olympus.apollo.repository.KSDocumentRepository;
|
import com.olympus.apollo.repository.KSDocumentRepository;
|
||||||
import com.olympus.apollo.repository.KSIngestionInfoRepository;
|
import com.olympus.apollo.repository.KSTextsRepository;
|
||||||
|
import com.olympus.dto.IngestionOutput;
|
||||||
|
import com.olympus.model.apollo.KSDocument;
|
||||||
|
import com.olympus.model.apollo.KSIngestionInfo;
|
||||||
|
import com.olympus.model.apollo.KSTexts;
|
||||||
|
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@@ -93,39 +98,83 @@ public class KSIngestor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public IngestionOutput ingestDocumentByIdAsync(String id) {
|
||||||
|
IngestionOutput ingestionOutput= new IngestionOutput();
|
||||||
|
Optional<KSDocument> optionalDocument = ksDocumentRepository.findById(id);
|
||||||
|
if (optionalDocument.isPresent()) {
|
||||||
|
KSDocument ksDocument = optionalDocument.get();
|
||||||
|
if ("LOADED".equals(ksDocument.getIngestionStatus()) || "ERROR".equals(ksDocument.getIngestionStatus())) {
|
||||||
|
ingestionOutput.setStatus("IN PROGRESS");
|
||||||
|
ingestDocumentAsync(ksDocument);
|
||||||
|
return ingestionOutput;
|
||||||
|
} else {
|
||||||
|
ingestionOutput.setMessage("OOPS: Document is already Injected");
|
||||||
|
return ingestionOutput;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ingestionOutput.setMessage("OOPS: Document Not found");
|
||||||
|
return ingestionOutput;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Async
|
||||||
|
private CompletableFuture<Void> ingestDocumentAsync(KSDocument ksDocument) {
|
||||||
|
ingestDocument(ksDocument);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
private IngestionOutput ingestDocument(KSDocument ksDocument) {
|
private IngestionOutput ingestDocument(KSDocument ksDocument) {
|
||||||
IngestionOutput ingestionLoopOutput = new IngestionOutput();
|
IngestionOutput ingestionLoopOutput = new IngestionOutput();
|
||||||
try {
|
try {
|
||||||
ksDocument.setIngestionStatus("IN PROGRESS");
|
ksDocument.setIngestionStatus("IN PROGRESS");
|
||||||
ksDocumentRepository.save(ksDocument);
|
ksDocumentRepository.save(ksDocument);
|
||||||
|
|
||||||
Resource file = storageService.loadAsResource(ksDocument.getFilePath());
|
|
||||||
TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(file);
|
|
||||||
|
|
||||||
List<Document> docs = tikaDocumentReader.read();
|
|
||||||
logger.info("Ingested document: " + ksDocument.getFilePath());
|
|
||||||
logger.info("Number of documents: " + docs.size());
|
|
||||||
|
|
||||||
KSIngestionInfo ingestionInfo = ksDocument.getIngestionInfo();
|
KSIngestionInfo ingestionInfo = ksDocument.getIngestionInfo();
|
||||||
|
List<Document> docs = null;
|
||||||
|
try {
|
||||||
|
ksDocument.setIngestionMessage("Reading document: " + ksDocument.getFilePath());
|
||||||
|
ksDocumentRepository.save(ksDocument);
|
||||||
|
|
||||||
|
Resource file = storageService.loadAsResource(ksDocument.getFilePath());
|
||||||
|
TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(file);
|
||||||
|
|
||||||
|
docs = tikaDocumentReader.read();
|
||||||
|
|
||||||
|
logger.info("Ingested document: " + ksDocument.getFilePath());
|
||||||
|
logger.info("Number of documents: " + docs.size());
|
||||||
|
ksDocument.setIngestionMessage("Document read successfully");
|
||||||
|
ksDocumentRepository.save(ksDocument);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error reading document: " + e.getMessage());
|
||||||
|
ksDocument.setIngestionStatus("ERROR");
|
||||||
|
ksDocument.setIngestionMessage("Error reading document: " + e.getMessage());
|
||||||
|
ksDocumentRepository.save(ksDocument);
|
||||||
|
ingestionLoopOutput.setStatus("ERROR");
|
||||||
|
ingestionLoopOutput.setMessage("Error reading document: " + e.getMessage());
|
||||||
|
return ingestionLoopOutput;
|
||||||
|
}
|
||||||
|
|
||||||
TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(),
|
TokenTextSplitter splitter = new TokenTextSplitter(ingestionInfo.getDefaultChunkSize(),
|
||||||
ingestionInfo.getMinChunkSize(),
|
ingestionInfo.getMinChunkSize(),
|
||||||
ingestionInfo.getMinChunkSizeToEmbed(),
|
ingestionInfo.getMinChunkSizeToEmbed(),
|
||||||
ingestionInfo.getMaxNumberOfChunks(),
|
ingestionInfo.getMaxNumberOfChunks(),
|
||||||
true);
|
true);
|
||||||
|
|
||||||
HashMap<String, String> metadata = ingestionInfo.getMetadata();
|
HashMap<String, String> metadata = ingestionInfo.getMetadata();
|
||||||
metadata.put("KsDocumentId",ksDocument.getId());
|
metadata.put("KsDocumentId",ksDocument.getId());
|
||||||
|
|
||||||
docs.forEach(doc -> {
|
docs.forEach(doc -> {
|
||||||
List<Document> splitDocs = splitter.split(doc);
|
List<Document> splitDocs = splitter.split(doc);
|
||||||
|
|
||||||
logger.info("Number of documents: " + splitDocs.size());
|
|
||||||
for (Document splitDoc : splitDocs) {
|
for (Document splitDoc : splitDocs) {
|
||||||
splitDoc.getMetadata().putAll(metadata);
|
splitDoc.getMetadata().putAll(metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ksDocument.setIngestionMessage("Embedding documents");
|
||||||
|
ksDocumentRepository.save(ksDocument);
|
||||||
embedDocuments(splitDocs, ingestionInfo);
|
embedDocuments(splitDocs, ingestionInfo);
|
||||||
});
|
});
|
||||||
|
|
||||||
ksDocument.setIngestionStatus("INGESTED");
|
ksDocument.setIngestionStatus("INGESTED");
|
||||||
ksDocument.setIngestionDate(new Date());
|
ksDocument.setIngestionDate(new Date());
|
||||||
ksDocument.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date()));
|
ksDocument.setIngestionDateFormat(new SimpleDateFormat("MM/dd/yy").format(new Date()));
|
||||||
@@ -135,12 +184,17 @@ public class KSIngestor {
|
|||||||
ingestionLoopOutput.setStatus("OK");
|
ingestionLoopOutput.setStatus("OK");
|
||||||
ingestionLoopOutput.setMessage("OK");
|
ingestionLoopOutput.setMessage("OK");
|
||||||
}catch (Exception e){
|
}catch (Exception e){
|
||||||
|
ksDocument.setIngestionStatus("ERROR");
|
||||||
|
ksDocument.setIngestionMessage("Error ingesting document: " + e.getMessage());
|
||||||
|
ksDocumentRepository.save(ksDocument);
|
||||||
|
|
||||||
ingestionLoopOutput.setStatus("ERROR");
|
ingestionLoopOutput.setStatus("ERROR");
|
||||||
ingestionLoopOutput.setMessage(e.getMessage());
|
ingestionLoopOutput.setMessage(e.getMessage());
|
||||||
}
|
}
|
||||||
return ingestionLoopOutput;
|
return ingestionLoopOutput;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public IngestionOutput ingestTextById(String id,String textToBeEmbed,String KsExternalDocUniqueID) {
|
public IngestionOutput ingestTextById(String id,String textToBeEmbed,String KsExternalDocUniqueID) {
|
||||||
IngestionOutput ingestionOutput= new IngestionOutput();
|
IngestionOutput ingestionOutput= new IngestionOutput();
|
||||||
Optional<KSTexts> optionalDocument = ksTextsRepository.findById(id);
|
Optional<KSTexts> optionalDocument = ksTextsRepository.findById(id);
|
||||||
@@ -218,9 +272,6 @@ public class KSIngestor {
|
|||||||
private void embedDocuments(List<Document> docs, KSIngestionInfo ingestionInfo) {
|
private void embedDocuments(List<Document> docs, KSIngestionInfo ingestionInfo) {
|
||||||
|
|
||||||
logger.info("Embedding documents");
|
logger.info("Embedding documents");
|
||||||
|
|
||||||
docs.forEach(doc -> logger.info("Document metadata: " + doc.getMetadata()));
|
|
||||||
|
|
||||||
int batchSize = embDocsBatchSize;
|
int batchSize = embDocsBatchSize;
|
||||||
for (int i = 0; i < docs.size(); i += batchSize) {
|
for (int i = 0; i < docs.size(); i += batchSize) {
|
||||||
int end = Math.min(i + batchSize, docs.size());
|
int end = Math.min(i + batchSize, docs.size());
|
||||||
@@ -228,7 +279,7 @@ public class KSIngestor {
|
|||||||
try {
|
try {
|
||||||
Thread.sleep(embDocRetryTime);
|
Thread.sleep(embDocRetryTime);
|
||||||
vectorStore.add(currentList);
|
vectorStore.add(currentList);
|
||||||
logger.info("Documents embedded - Progress: Batch from {} to {} completed", i, end);
|
logger.info("Documents embedded - Progress: Batch from {} to {} completed of {} total chunks", i, end, docs.size());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Error embedding documents from {} to {}: {}", i, end, e.getMessage());
|
logger.error("Error embedding documents from {} to {}: {}", i, end, e.getMessage());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -66,4 +66,6 @@ logging:
|
|||||||
#org.springframework.web.client: DEBUG
|
#org.springframework.web.client: DEBUG
|
||||||
|
|
||||||
java-re-module:
|
java-re-module:
|
||||||
url: "http://localhost:8084"
|
url: "http://localhost:8084"
|
||||||
|
|
||||||
|
tika.config: "tika-config.xml"
|
||||||
|
|||||||
4
tika-config.xml
Normal file
4
tika-config.xml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<properties>
|
||||||
|
|
||||||
|
</properties>
|
||||||
Reference in New Issue
Block a user