From be31b1e83c96b25c7fab2e01be1a57091e6ac1d9 Mon Sep 17 00:00:00 2001 From: Emanuele Ferrelli Date: Tue, 8 Jul 2025 10:01:51 +0200 Subject: [PATCH] Delete try/catch block in step --- .../stepSolvers/EmbeddingDocTempSolver.java | 109 +++++++++--------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/src/main/java/com/olympus/hermione/stepSolvers/EmbeddingDocTempSolver.java b/src/main/java/com/olympus/hermione/stepSolvers/EmbeddingDocTempSolver.java index c2ef3da..b508e98 100644 --- a/src/main/java/com/olympus/hermione/stepSolvers/EmbeddingDocTempSolver.java +++ b/src/main/java/com/olympus/hermione/stepSolvers/EmbeddingDocTempSolver.java @@ -4,15 +4,17 @@ import ch.qos.logback.classic.Logger; import com.olympus.hermione.models.ScenarioExecution; import com.olympus.hermione.utility.AttributeParser; import java.io.File; +import java.io.IOException; import java.util.Collections; import java.util.List; import org.apache.tika.Tika; +import org.apache.tika.exception.TikaException; import org.slf4j.LoggerFactory; import org.springframework.ai.document.Document; import org.springframework.ai.transformer.splitter.TokenTextSplitter; public class EmbeddingDocTempSolver extends StepSolver { - + private String scenario_execution_id; private String path_file; private int default_chunk_size; @@ -20,11 +22,9 @@ public class EmbeddingDocTempSolver extends StepSolver { private int min_chunk_length_to_embed; private int max_num_chunks; - - Logger logger = (Logger) LoggerFactory.getLogger(EmbeddingDocTempSolver.class); - private void loadParameters(){ + private void loadParameters() { logger.info("Loading parameters"); this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId()); logger.info("Scenario Execution ID: {}", this.scenarioExecution.getId()); @@ -36,77 +36,83 @@ public class EmbeddingDocTempSolver extends StepSolver { this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file")); logger.info("Parsed path_file: {}", this.path_file); - if(this.step.getAttributes().containsKey("default_chunk_size")){ + if (this.step.getAttributes().containsKey("default_chunk_size")) { this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size"); logger.info("Parsed default_chunk_size from attributes: {}", this.default_chunk_size); - }else{ + } else { this.default_chunk_size = 8000; logger.info("default_chunk_size not found in attributes, using default: 8000"); } - if(this.step.getAttributes().containsKey("min_chunk_size")){ + if (this.step.getAttributes().containsKey("min_chunk_size")) { this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size"); - }else{ + } else { this.min_chunk_size = 50; logger.info("min_chunk_size not found in attributes, using default: 50"); } - if(this.step.getAttributes().containsKey("min_chunk_length_to_embed")){ + if (this.step.getAttributes().containsKey("min_chunk_length_to_embed")) { this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed"); - }else{ + } else { this.min_chunk_length_to_embed = 50; logger.info("min_chunk_length_to_embed not found in attributes, using default: 50"); } - if(this.step.getAttributes().containsKey("max_num_chunks")){ + if (this.step.getAttributes().containsKey("max_num_chunks")) { this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks"); - }else{ + } else { this.max_num_chunks = 1000; logger.info("max_num_chunks not found in attributes, using default: 1000"); } } @Override - public ScenarioExecution solveStep(){ + public ScenarioExecution solveStep() { - try{ - logger.info("Solving step: " + this.step.getName()); - this.scenarioExecution.setCurrentStepId(this.step.getStepId()); - logger.info("Loading parameters for step: {}", this.step.getName()); - loadParameters(); - logger.info("Embedding documents"); - File file = new File(this.path_file); - logger.info("Reading file from path: {}", this.path_file); - Tika tika = new Tika(); - tika.setMaxStringLength(-1); - String text = tika.parseToString(file); - logger.info("File read successfully. Length: {} characters", text.length()); - Document myDoc = new Document(text); + logger.info("Solving step: " + this.step.getName()); + this.scenarioExecution.setCurrentStepId(this.step.getStepId()); + logger.info("Loading parameters for step: {}", this.step.getName()); + loadParameters(); + logger.info("Embedding documents"); + File file = new File(this.path_file); + logger.info("Reading file from path: {}", this.path_file); + Tika tika = new Tika(); + tika.setMaxStringLength(-1); + String text; + try { + text = tika.parseToString(file); + } catch (IOException | TikaException e) { + logger.error("Error parsing file: ", e); + throw new RuntimeException("Error parsing file", e); + } + logger.info("File read successfully. Length: {} characters", text.length()); + Document myDoc = new Document(text); - List docs = Collections.singletonList(myDoc); + List docs = Collections.singletonList(myDoc); - logger.info("Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}", + logger.info( + "Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}", this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks); - TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size, - this.min_chunk_size, - this.min_chunk_length_to_embed, - this.max_num_chunks, - true); + TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size, + this.min_chunk_size, + this.min_chunk_length_to_embed, + this.max_num_chunks, + true); - logger.info("Splitting and embedding documents"); - docs.forEach(doc -> { - List splitDocs = splitter.split(doc); - Integer docIndex = 0; - logger.info("Number of split documents: {}", splitDocs.size()); + logger.info("Splitting and embedding documents"); + docs.forEach(doc -> { + List splitDocs = splitter.split(doc); + Integer docIndex = 0; + logger.info("Number of split documents: {}", splitDocs.size()); - for (Document splitDoc : splitDocs) { - splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id); - splitDoc.getMetadata().put("KsDocumentIndex",docIndex.toString()); - splitDoc.getMetadata().put("KsDoctype", "temp"); - logger.info("Adding split document with index {} to vector store", docIndex); - docIndex++; - } - logger.info("Adding {} split documents to vector store", splitDocs.size()); + for (Document splitDoc : splitDocs) { + splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id); + splitDoc.getMetadata().put("KsDocumentIndex", docIndex.toString()); + splitDoc.getMetadata().put("KsDoctype", "temp"); + logger.info("Adding split document with index {} to vector store", docIndex); + docIndex++; + } + logger.info("Adding {} split documents to vector store", splitDocs.size()); // Carica un massimo di 10 documenti per volta int batchSize = 10; @@ -116,14 +122,9 @@ public class EmbeddingDocTempSolver extends StepSolver { vectorStore.add(batch); logger.info("Added batch of {} documents to vector store (from {} to {})", batch.size(), i, end - 1); } - //vectorStore.add(splitDocs); - }); - logger.info("All documents embedded and added to vector store successfully"); - - }catch (Exception e){ - logger.error("Error while solvingStep: "+e.getMessage(), e); - e.printStackTrace(); - } + // vectorStore.add(splitDocs); + }); + logger.info("All documents embedded and added to vector store successfully"); logger.info("Setting next step id: {}", this.step.getNextStepId()); this.scenarioExecution.setNextStepId(this.step.getNextStepId());