From 485c781e4c408c201d3cd7c73844b1f4e0b15edd Mon Sep 17 00:00:00 2001 From: Emanuele Ferrelli Date: Wed, 28 May 2025 14:41:31 +0200 Subject: [PATCH] Update file system directory --- .../hermione/services/FileService.java | 2 -- .../stepSolvers/EmbeddingDocTempSolver.java | 30 ++++++++++++++----- src/main/resources/application.properties | 6 +--- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/main/java/com/olympus/hermione/services/FileService.java b/src/main/java/com/olympus/hermione/services/FileService.java index 0e8c7c3..411697d 100644 --- a/src/main/java/com/olympus/hermione/services/FileService.java +++ b/src/main/java/com/olympus/hermione/services/FileService.java @@ -28,8 +28,6 @@ import com.olympus.model.apollo.KSIngestionInfo; @Service public class FileService { - private static final String UPLOAD_DIR = "C:\\mnt\\hermione_storage\\documents\\file_input_scenarios\\"; - private Logger logger = LoggerFactory.getLogger(FileService.class); @Value("${file.upload-dir}") diff --git a/src/main/java/com/olympus/hermione/stepSolvers/EmbeddingDocTempSolver.java b/src/main/java/com/olympus/hermione/stepSolvers/EmbeddingDocTempSolver.java index 994e8f7..897ba1e 100644 --- a/src/main/java/com/olympus/hermione/stepSolvers/EmbeddingDocTempSolver.java +++ b/src/main/java/com/olympus/hermione/stepSolvers/EmbeddingDocTempSolver.java @@ -22,43 +22,48 @@ public class EmbeddingDocTempSolver extends StepSolver { - Logger logger = (Logger) LoggerFactory.getLogger(BasicQueryRagSolver.class); + Logger logger = (Logger) LoggerFactory.getLogger(EmbeddingDocTempSolver.class); private void loadParameters(){ logger.info("Loading parameters"); this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId()); - logger.info("Scenario Execution ID: "+this.scenarioExecution.getId()); + logger.info("Scenario Execution ID: {}", this.scenarioExecution.getId()); AttributeParser attributeParser = new AttributeParser(this.scenarioExecution); this.scenario_execution_id = attributeParser.parse((String) this.scenarioExecution.getId()); this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file")); + logger.info("Parsed path_file: {}", this.path_file); if(this.step.getAttributes().containsKey("default_chunk_size")){ this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size"); + logger.info("Parsed default_chunk_size from attributes: {}", this.default_chunk_size); }else{ this.default_chunk_size = 8000; + logger.info("default_chunk_size not found in attributes, using default: 8000"); } if(this.step.getAttributes().containsKey("min_chunk_size")){ this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size"); }else{ this.min_chunk_size = 50; + logger.info("min_chunk_size not found in attributes, using default: 50"); } if(this.step.getAttributes().containsKey("min_chunk_length_to_embed")){ this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed"); }else{ this.min_chunk_length_to_embed = 50; + logger.info("min_chunk_length_to_embed not found in attributes, using default: 50"); } if(this.step.getAttributes().containsKey("max_num_chunks")){ this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks"); }else{ this.max_num_chunks = 1000; + logger.info("max_num_chunks not found in attributes, using default: 1000"); } - } @Override @@ -67,45 +72,54 @@ public class EmbeddingDocTempSolver extends StepSolver { try{ logger.info("Solving step: " + this.step.getName()); this.scenarioExecution.setCurrentStepId(this.step.getStepId()); + logger.info("Loading parameters for step: {}", this.step.getName()); loadParameters(); - logger.info("Embedding documents"); File file = new File(this.path_file); + logger.info("Reading file from path: {}", this.path_file); Tika tika = new Tika(); tika.setMaxStringLength(-1); String text = tika.parseToString(file); + logger.info("File read successfully. Length: {} characters", text.length()); Document myDoc = new Document(text); List docs = Collections.singletonList(myDoc); + logger.info("Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}", + this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks); TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks, true); - + logger.info("Splitting and embedding documents"); docs.forEach(doc -> { List splitDocs = splitter.split(doc); Integer docIndex = 0; - logger.info("Number of documents: " + splitDocs.size()); + logger.info("Number of split documents: {}", splitDocs.size()); for (Document splitDoc : splitDocs) { splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id); splitDoc.getMetadata().put("KsDocumentIndex",docIndex.toString()); - logger.info("DOC INDEX: ", docIndex); + splitDoc.getMetadata().put("KsDoctype", "temp"); + logger.info("Adding split document with index {} to vector store", docIndex); docIndex++; } + logger.info("Adding {} split documents to vector store", splitDocs.size()); vectorStore.add(splitDocs); }); + logger.info("All documents embedded and added to vector store successfully"); }catch (Exception e){ - logger.error("Error while solvingStep: "+e.getMessage()); + logger.error("Error while solvingStep: "+e.getMessage(), e); e.printStackTrace(); } + logger.info("Setting next step id: {}", this.step.getNextStepId()); this.scenarioExecution.setNextStepId(this.step.getNextStepId()); + logger.info("Returning scenario execution for step: {}", this.step.getName()); return this.scenarioExecution; } diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 152456c..0303374 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -56,10 +56,6 @@ eureka.instance.preferIpAddress: true hermione.fe.url = http://localhost:5173 -java-parser-module.url: http://java-parser-module-service.olympus.svc.cluster.local:8080 -java-re-module.url: http://java-re-module-service.olympus.svc.cluster.local:8080 -jsp-parser-module.url: http://jsp-parser-module-service.olympus.svc.cluster.local:8080 - spring.ai.vectorstore.chroma.client.host=http://108.142.74.161 spring.ai.vectorstore.chroma.client.port=8000 spring.ai.vectorstore.chroma.client.key-token=tKAJfN1Yv5lP7pKorJHGfHMQhNEcM9uu @@ -70,7 +66,7 @@ spring.servlet.multipart.max-file-size=10MB spring.servlet.multipart.max-request-size=10MB file.upload-dir=/mnt/hermione_storage/documents/file_input_scenarios/ - +# file.upload-dir=C:\\mnt\\hermione_storage\\documents\\file_input_scenarios\\ generic-file-parser-module.url=http://generic-file-parser-module-service.olympus.svc.cluster.local:8080