Merged PR 141: Update file system directory

Update file system directory
This commit is contained in:
2025-05-28 12:42:25 +00:00
3 changed files with 23 additions and 15 deletions

View File

@@ -28,8 +28,6 @@ import com.olympus.model.apollo.KSIngestionInfo;
@Service
public class FileService {
private static final String UPLOAD_DIR = "C:\\mnt\\hermione_storage\\documents\\file_input_scenarios\\";
private Logger logger = LoggerFactory.getLogger(FileService.class);
@Value("${file.upload-dir}")

View File

@@ -22,43 +22,48 @@ public class EmbeddingDocTempSolver extends StepSolver {
Logger logger = (Logger) LoggerFactory.getLogger(BasicQueryRagSolver.class);
Logger logger = (Logger) LoggerFactory.getLogger(EmbeddingDocTempSolver.class);
private void loadParameters(){
logger.info("Loading parameters");
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
logger.info("Scenario Execution ID: "+this.scenarioExecution.getId());
logger.info("Scenario Execution ID: {}", this.scenarioExecution.getId());
AttributeParser attributeParser = new AttributeParser(this.scenarioExecution);
this.scenario_execution_id = attributeParser.parse((String) this.scenarioExecution.getId());
this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file"));
logger.info("Parsed path_file: {}", this.path_file);
if(this.step.getAttributes().containsKey("default_chunk_size")){
this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size");
logger.info("Parsed default_chunk_size from attributes: {}", this.default_chunk_size);
}else{
this.default_chunk_size = 8000;
logger.info("default_chunk_size not found in attributes, using default: 8000");
}
if(this.step.getAttributes().containsKey("min_chunk_size")){
this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size");
}else{
this.min_chunk_size = 50;
logger.info("min_chunk_size not found in attributes, using default: 50");
}
if(this.step.getAttributes().containsKey("min_chunk_length_to_embed")){
this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed");
}else{
this.min_chunk_length_to_embed = 50;
logger.info("min_chunk_length_to_embed not found in attributes, using default: 50");
}
if(this.step.getAttributes().containsKey("max_num_chunks")){
this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks");
}else{
this.max_num_chunks = 1000;
logger.info("max_num_chunks not found in attributes, using default: 1000");
}
}
@Override
@@ -67,45 +72,54 @@ public class EmbeddingDocTempSolver extends StepSolver {
try{
logger.info("Solving step: " + this.step.getName());
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
logger.info("Loading parameters for step: {}", this.step.getName());
loadParameters();
logger.info("Embedding documents");
File file = new File(this.path_file);
logger.info("Reading file from path: {}", this.path_file);
Tika tika = new Tika();
tika.setMaxStringLength(-1);
String text = tika.parseToString(file);
logger.info("File read successfully. Length: {} characters", text.length());
Document myDoc = new Document(text);
List<Document> docs = Collections.singletonList(myDoc);
logger.info("Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}",
this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks);
TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size,
this.min_chunk_size,
this.min_chunk_length_to_embed,
this.max_num_chunks,
true);
logger.info("Splitting and embedding documents");
docs.forEach(doc -> {
List<Document> splitDocs = splitter.split(doc);
Integer docIndex = 0;
logger.info("Number of documents: " + splitDocs.size());
logger.info("Number of split documents: {}", splitDocs.size());
for (Document splitDoc : splitDocs) {
splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id);
splitDoc.getMetadata().put("KsDocumentIndex",docIndex.toString());
logger.info("DOC INDEX: ", docIndex);
splitDoc.getMetadata().put("KsDoctype", "temp");
logger.info("Adding split document with index {} to vector store", docIndex);
docIndex++;
}
logger.info("Adding {} split documents to vector store", splitDocs.size());
vectorStore.add(splitDocs);
});
logger.info("All documents embedded and added to vector store successfully");
}catch (Exception e){
logger.error("Error while solvingStep: "+e.getMessage());
logger.error("Error while solvingStep: "+e.getMessage(), e);
e.printStackTrace();
}
logger.info("Setting next step id: {}", this.step.getNextStepId());
this.scenarioExecution.setNextStepId(this.step.getNextStepId());
logger.info("Returning scenario execution for step: {}", this.step.getName());
return this.scenarioExecution;
}

View File

@@ -56,10 +56,6 @@ eureka.instance.preferIpAddress: true
hermione.fe.url = http://localhost:5173
java-parser-module.url: http://java-parser-module-service.olympus.svc.cluster.local:8080
java-re-module.url: http://java-re-module-service.olympus.svc.cluster.local:8080
jsp-parser-module.url: http://jsp-parser-module-service.olympus.svc.cluster.local:8080
spring.ai.vectorstore.chroma.client.host=http://108.142.74.161
spring.ai.vectorstore.chroma.client.port=8000
spring.ai.vectorstore.chroma.client.key-token=tKAJfN1Yv5lP7pKorJHGfHMQhNEcM9uu
@@ -70,7 +66,7 @@ spring.servlet.multipart.max-file-size=10MB
spring.servlet.multipart.max-request-size=10MB
file.upload-dir=/mnt/hermione_storage/documents/file_input_scenarios/
# file.upload-dir=C:\\mnt\\hermione_storage\\documents\\file_input_scenarios\\
generic-file-parser-module.url=http://generic-file-parser-module-service.olympus.svc.cluster.local:8080