Delete try/catch block in step

This commit is contained in:
2025-07-08 10:01:51 +02:00
parent 170c5515eb
commit be31b1e83c

View File

@@ -4,15 +4,17 @@ import ch.qos.logback.classic.Logger;
import com.olympus.hermione.models.ScenarioExecution; import com.olympus.hermione.models.ScenarioExecution;
import com.olympus.hermione.utility.AttributeParser; import com.olympus.hermione.utility.AttributeParser;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.tika.Tika; import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document; import org.springframework.ai.document.Document;
import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.ai.transformer.splitter.TokenTextSplitter;
public class EmbeddingDocTempSolver extends StepSolver { public class EmbeddingDocTempSolver extends StepSolver {
private String scenario_execution_id; private String scenario_execution_id;
private String path_file; private String path_file;
private int default_chunk_size; private int default_chunk_size;
@@ -20,11 +22,9 @@ public class EmbeddingDocTempSolver extends StepSolver {
private int min_chunk_length_to_embed; private int min_chunk_length_to_embed;
private int max_num_chunks; private int max_num_chunks;
Logger logger = (Logger) LoggerFactory.getLogger(EmbeddingDocTempSolver.class); Logger logger = (Logger) LoggerFactory.getLogger(EmbeddingDocTempSolver.class);
private void loadParameters(){ private void loadParameters() {
logger.info("Loading parameters"); logger.info("Loading parameters");
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId()); this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
logger.info("Scenario Execution ID: {}", this.scenarioExecution.getId()); logger.info("Scenario Execution ID: {}", this.scenarioExecution.getId());
@@ -36,77 +36,83 @@ public class EmbeddingDocTempSolver extends StepSolver {
this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file")); this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file"));
logger.info("Parsed path_file: {}", this.path_file); logger.info("Parsed path_file: {}", this.path_file);
if(this.step.getAttributes().containsKey("default_chunk_size")){ if (this.step.getAttributes().containsKey("default_chunk_size")) {
this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size"); this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size");
logger.info("Parsed default_chunk_size from attributes: {}", this.default_chunk_size); logger.info("Parsed default_chunk_size from attributes: {}", this.default_chunk_size);
}else{ } else {
this.default_chunk_size = 8000; this.default_chunk_size = 8000;
logger.info("default_chunk_size not found in attributes, using default: 8000"); logger.info("default_chunk_size not found in attributes, using default: 8000");
} }
if(this.step.getAttributes().containsKey("min_chunk_size")){ if (this.step.getAttributes().containsKey("min_chunk_size")) {
this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size"); this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size");
}else{ } else {
this.min_chunk_size = 50; this.min_chunk_size = 50;
logger.info("min_chunk_size not found in attributes, using default: 50"); logger.info("min_chunk_size not found in attributes, using default: 50");
} }
if(this.step.getAttributes().containsKey("min_chunk_length_to_embed")){ if (this.step.getAttributes().containsKey("min_chunk_length_to_embed")) {
this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed"); this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed");
}else{ } else {
this.min_chunk_length_to_embed = 50; this.min_chunk_length_to_embed = 50;
logger.info("min_chunk_length_to_embed not found in attributes, using default: 50"); logger.info("min_chunk_length_to_embed not found in attributes, using default: 50");
} }
if(this.step.getAttributes().containsKey("max_num_chunks")){ if (this.step.getAttributes().containsKey("max_num_chunks")) {
this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks"); this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks");
}else{ } else {
this.max_num_chunks = 1000; this.max_num_chunks = 1000;
logger.info("max_num_chunks not found in attributes, using default: 1000"); logger.info("max_num_chunks not found in attributes, using default: 1000");
} }
} }
@Override @Override
public ScenarioExecution solveStep(){ public ScenarioExecution solveStep() {
try{ logger.info("Solving step: " + this.step.getName());
logger.info("Solving step: " + this.step.getName()); this.scenarioExecution.setCurrentStepId(this.step.getStepId());
this.scenarioExecution.setCurrentStepId(this.step.getStepId()); logger.info("Loading parameters for step: {}", this.step.getName());
logger.info("Loading parameters for step: {}", this.step.getName()); loadParameters();
loadParameters(); logger.info("Embedding documents");
logger.info("Embedding documents"); File file = new File(this.path_file);
File file = new File(this.path_file); logger.info("Reading file from path: {}", this.path_file);
logger.info("Reading file from path: {}", this.path_file); Tika tika = new Tika();
Tika tika = new Tika(); tika.setMaxStringLength(-1);
tika.setMaxStringLength(-1); String text;
String text = tika.parseToString(file); try {
logger.info("File read successfully. Length: {} characters", text.length()); text = tika.parseToString(file);
Document myDoc = new Document(text); } catch (IOException | TikaException e) {
logger.error("Error parsing file: ", e);
throw new RuntimeException("Error parsing file", e);
}
logger.info("File read successfully. Length: {} characters", text.length());
Document myDoc = new Document(text);
List<Document> docs = Collections.singletonList(myDoc); List<Document> docs = Collections.singletonList(myDoc);
logger.info("Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}", logger.info(
"Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}",
this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks); this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks);
TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size, TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size,
this.min_chunk_size, this.min_chunk_size,
this.min_chunk_length_to_embed, this.min_chunk_length_to_embed,
this.max_num_chunks, this.max_num_chunks,
true); true);
logger.info("Splitting and embedding documents"); logger.info("Splitting and embedding documents");
docs.forEach(doc -> { docs.forEach(doc -> {
List<Document> splitDocs = splitter.split(doc); List<Document> splitDocs = splitter.split(doc);
Integer docIndex = 0; Integer docIndex = 0;
logger.info("Number of split documents: {}", splitDocs.size()); logger.info("Number of split documents: {}", splitDocs.size());
for (Document splitDoc : splitDocs) { for (Document splitDoc : splitDocs) {
splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id); splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id);
splitDoc.getMetadata().put("KsDocumentIndex",docIndex.toString()); splitDoc.getMetadata().put("KsDocumentIndex", docIndex.toString());
splitDoc.getMetadata().put("KsDoctype", "temp"); splitDoc.getMetadata().put("KsDoctype", "temp");
logger.info("Adding split document with index {} to vector store", docIndex); logger.info("Adding split document with index {} to vector store", docIndex);
docIndex++; docIndex++;
} }
logger.info("Adding {} split documents to vector store", splitDocs.size()); logger.info("Adding {} split documents to vector store", splitDocs.size());
// Carica un massimo di 10 documenti per volta // Carica un massimo di 10 documenti per volta
int batchSize = 10; int batchSize = 10;
@@ -116,14 +122,9 @@ public class EmbeddingDocTempSolver extends StepSolver {
vectorStore.add(batch); vectorStore.add(batch);
logger.info("Added batch of {} documents to vector store (from {} to {})", batch.size(), i, end - 1); logger.info("Added batch of {} documents to vector store (from {} to {})", batch.size(), i, end - 1);
} }
//vectorStore.add(splitDocs); // vectorStore.add(splitDocs);
}); });
logger.info("All documents embedded and added to vector store successfully"); logger.info("All documents embedded and added to vector store successfully");
}catch (Exception e){
logger.error("Error while solvingStep: "+e.getMessage(), e);
e.printStackTrace();
}
logger.info("Setting next step id: {}", this.step.getNextStepId()); logger.info("Setting next step id: {}", this.step.getNextStepId());
this.scenarioExecution.setNextStepId(this.step.getNextStepId()); this.scenarioExecution.setNextStepId(this.step.getNextStepId());