Delete try/catch block in step
This commit is contained in:
@@ -4,15 +4,17 @@ import ch.qos.logback.classic.Logger;
|
|||||||
import com.olympus.hermione.models.ScenarioExecution;
|
import com.olympus.hermione.models.ScenarioExecution;
|
||||||
import com.olympus.hermione.utility.AttributeParser;
|
import com.olympus.hermione.utility.AttributeParser;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.tika.Tika;
|
import org.apache.tika.Tika;
|
||||||
|
import org.apache.tika.exception.TikaException;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.ai.document.Document;
|
import org.springframework.ai.document.Document;
|
||||||
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
||||||
|
|
||||||
public class EmbeddingDocTempSolver extends StepSolver {
|
public class EmbeddingDocTempSolver extends StepSolver {
|
||||||
|
|
||||||
private String scenario_execution_id;
|
private String scenario_execution_id;
|
||||||
private String path_file;
|
private String path_file;
|
||||||
private int default_chunk_size;
|
private int default_chunk_size;
|
||||||
@@ -20,11 +22,9 @@ public class EmbeddingDocTempSolver extends StepSolver {
|
|||||||
private int min_chunk_length_to_embed;
|
private int min_chunk_length_to_embed;
|
||||||
private int max_num_chunks;
|
private int max_num_chunks;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Logger logger = (Logger) LoggerFactory.getLogger(EmbeddingDocTempSolver.class);
|
Logger logger = (Logger) LoggerFactory.getLogger(EmbeddingDocTempSolver.class);
|
||||||
|
|
||||||
private void loadParameters(){
|
private void loadParameters() {
|
||||||
logger.info("Loading parameters");
|
logger.info("Loading parameters");
|
||||||
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
|
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
|
||||||
logger.info("Scenario Execution ID: {}", this.scenarioExecution.getId());
|
logger.info("Scenario Execution ID: {}", this.scenarioExecution.getId());
|
||||||
@@ -36,77 +36,83 @@ public class EmbeddingDocTempSolver extends StepSolver {
|
|||||||
this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file"));
|
this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file"));
|
||||||
logger.info("Parsed path_file: {}", this.path_file);
|
logger.info("Parsed path_file: {}", this.path_file);
|
||||||
|
|
||||||
if(this.step.getAttributes().containsKey("default_chunk_size")){
|
if (this.step.getAttributes().containsKey("default_chunk_size")) {
|
||||||
this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size");
|
this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size");
|
||||||
logger.info("Parsed default_chunk_size from attributes: {}", this.default_chunk_size);
|
logger.info("Parsed default_chunk_size from attributes: {}", this.default_chunk_size);
|
||||||
}else{
|
} else {
|
||||||
this.default_chunk_size = 8000;
|
this.default_chunk_size = 8000;
|
||||||
logger.info("default_chunk_size not found in attributes, using default: 8000");
|
logger.info("default_chunk_size not found in attributes, using default: 8000");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(this.step.getAttributes().containsKey("min_chunk_size")){
|
if (this.step.getAttributes().containsKey("min_chunk_size")) {
|
||||||
this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size");
|
this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size");
|
||||||
}else{
|
} else {
|
||||||
this.min_chunk_size = 50;
|
this.min_chunk_size = 50;
|
||||||
logger.info("min_chunk_size not found in attributes, using default: 50");
|
logger.info("min_chunk_size not found in attributes, using default: 50");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(this.step.getAttributes().containsKey("min_chunk_length_to_embed")){
|
if (this.step.getAttributes().containsKey("min_chunk_length_to_embed")) {
|
||||||
this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed");
|
this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed");
|
||||||
}else{
|
} else {
|
||||||
this.min_chunk_length_to_embed = 50;
|
this.min_chunk_length_to_embed = 50;
|
||||||
logger.info("min_chunk_length_to_embed not found in attributes, using default: 50");
|
logger.info("min_chunk_length_to_embed not found in attributes, using default: 50");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(this.step.getAttributes().containsKey("max_num_chunks")){
|
if (this.step.getAttributes().containsKey("max_num_chunks")) {
|
||||||
this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks");
|
this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks");
|
||||||
}else{
|
} else {
|
||||||
this.max_num_chunks = 1000;
|
this.max_num_chunks = 1000;
|
||||||
logger.info("max_num_chunks not found in attributes, using default: 1000");
|
logger.info("max_num_chunks not found in attributes, using default: 1000");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ScenarioExecution solveStep(){
|
public ScenarioExecution solveStep() {
|
||||||
|
|
||||||
try{
|
logger.info("Solving step: " + this.step.getName());
|
||||||
logger.info("Solving step: " + this.step.getName());
|
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
|
||||||
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
|
logger.info("Loading parameters for step: {}", this.step.getName());
|
||||||
logger.info("Loading parameters for step: {}", this.step.getName());
|
loadParameters();
|
||||||
loadParameters();
|
logger.info("Embedding documents");
|
||||||
logger.info("Embedding documents");
|
File file = new File(this.path_file);
|
||||||
File file = new File(this.path_file);
|
logger.info("Reading file from path: {}", this.path_file);
|
||||||
logger.info("Reading file from path: {}", this.path_file);
|
Tika tika = new Tika();
|
||||||
Tika tika = new Tika();
|
tika.setMaxStringLength(-1);
|
||||||
tika.setMaxStringLength(-1);
|
String text;
|
||||||
String text = tika.parseToString(file);
|
try {
|
||||||
logger.info("File read successfully. Length: {} characters", text.length());
|
text = tika.parseToString(file);
|
||||||
Document myDoc = new Document(text);
|
} catch (IOException | TikaException e) {
|
||||||
|
logger.error("Error parsing file: ", e);
|
||||||
|
throw new RuntimeException("Error parsing file", e);
|
||||||
|
}
|
||||||
|
logger.info("File read successfully. Length: {} characters", text.length());
|
||||||
|
Document myDoc = new Document(text);
|
||||||
|
|
||||||
List<Document> docs = Collections.singletonList(myDoc);
|
List<Document> docs = Collections.singletonList(myDoc);
|
||||||
|
|
||||||
logger.info("Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}",
|
logger.info(
|
||||||
|
"Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}",
|
||||||
this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks);
|
this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks);
|
||||||
TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size,
|
TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size,
|
||||||
this.min_chunk_size,
|
this.min_chunk_size,
|
||||||
this.min_chunk_length_to_embed,
|
this.min_chunk_length_to_embed,
|
||||||
this.max_num_chunks,
|
this.max_num_chunks,
|
||||||
true);
|
true);
|
||||||
|
|
||||||
logger.info("Splitting and embedding documents");
|
logger.info("Splitting and embedding documents");
|
||||||
docs.forEach(doc -> {
|
docs.forEach(doc -> {
|
||||||
List<Document> splitDocs = splitter.split(doc);
|
List<Document> splitDocs = splitter.split(doc);
|
||||||
Integer docIndex = 0;
|
Integer docIndex = 0;
|
||||||
logger.info("Number of split documents: {}", splitDocs.size());
|
logger.info("Number of split documents: {}", splitDocs.size());
|
||||||
|
|
||||||
for (Document splitDoc : splitDocs) {
|
for (Document splitDoc : splitDocs) {
|
||||||
splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id);
|
splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id);
|
||||||
splitDoc.getMetadata().put("KsDocumentIndex",docIndex.toString());
|
splitDoc.getMetadata().put("KsDocumentIndex", docIndex.toString());
|
||||||
splitDoc.getMetadata().put("KsDoctype", "temp");
|
splitDoc.getMetadata().put("KsDoctype", "temp");
|
||||||
logger.info("Adding split document with index {} to vector store", docIndex);
|
logger.info("Adding split document with index {} to vector store", docIndex);
|
||||||
docIndex++;
|
docIndex++;
|
||||||
}
|
}
|
||||||
logger.info("Adding {} split documents to vector store", splitDocs.size());
|
logger.info("Adding {} split documents to vector store", splitDocs.size());
|
||||||
|
|
||||||
// Carica un massimo di 10 documenti per volta
|
// Carica un massimo di 10 documenti per volta
|
||||||
int batchSize = 10;
|
int batchSize = 10;
|
||||||
@@ -116,14 +122,9 @@ public class EmbeddingDocTempSolver extends StepSolver {
|
|||||||
vectorStore.add(batch);
|
vectorStore.add(batch);
|
||||||
logger.info("Added batch of {} documents to vector store (from {} to {})", batch.size(), i, end - 1);
|
logger.info("Added batch of {} documents to vector store (from {} to {})", batch.size(), i, end - 1);
|
||||||
}
|
}
|
||||||
//vectorStore.add(splitDocs);
|
// vectorStore.add(splitDocs);
|
||||||
});
|
});
|
||||||
logger.info("All documents embedded and added to vector store successfully");
|
logger.info("All documents embedded and added to vector store successfully");
|
||||||
|
|
||||||
}catch (Exception e){
|
|
||||||
logger.error("Error while solvingStep: "+e.getMessage(), e);
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info("Setting next step id: {}", this.step.getNextStepId());
|
logger.info("Setting next step id: {}", this.step.getNextStepId());
|
||||||
this.scenarioExecution.setNextStepId(this.step.getNextStepId());
|
this.scenarioExecution.setNextStepId(this.step.getNextStepId());
|
||||||
|
|||||||
Reference in New Issue
Block a user