Update file system directory
This commit is contained in:
@@ -28,8 +28,6 @@ import com.olympus.model.apollo.KSIngestionInfo;
|
||||
@Service
|
||||
public class FileService {
|
||||
|
||||
private static final String UPLOAD_DIR = "C:\\mnt\\hermione_storage\\documents\\file_input_scenarios\\";
|
||||
|
||||
private Logger logger = LoggerFactory.getLogger(FileService.class);
|
||||
|
||||
@Value("${file.upload-dir}")
|
||||
|
||||
@@ -22,43 +22,48 @@ public class EmbeddingDocTempSolver extends StepSolver {
|
||||
|
||||
|
||||
|
||||
Logger logger = (Logger) LoggerFactory.getLogger(BasicQueryRagSolver.class);
|
||||
Logger logger = (Logger) LoggerFactory.getLogger(EmbeddingDocTempSolver.class);
|
||||
|
||||
private void loadParameters(){
|
||||
logger.info("Loading parameters");
|
||||
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
|
||||
logger.info("Scenario Execution ID: "+this.scenarioExecution.getId());
|
||||
logger.info("Scenario Execution ID: {}", this.scenarioExecution.getId());
|
||||
|
||||
AttributeParser attributeParser = new AttributeParser(this.scenarioExecution);
|
||||
|
||||
this.scenario_execution_id = attributeParser.parse((String) this.scenarioExecution.getId());
|
||||
|
||||
this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file"));
|
||||
logger.info("Parsed path_file: {}", this.path_file);
|
||||
|
||||
if(this.step.getAttributes().containsKey("default_chunk_size")){
|
||||
this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size");
|
||||
logger.info("Parsed default_chunk_size from attributes: {}", this.default_chunk_size);
|
||||
}else{
|
||||
this.default_chunk_size = 8000;
|
||||
logger.info("default_chunk_size not found in attributes, using default: 8000");
|
||||
}
|
||||
|
||||
if(this.step.getAttributes().containsKey("min_chunk_size")){
|
||||
this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size");
|
||||
}else{
|
||||
this.min_chunk_size = 50;
|
||||
logger.info("min_chunk_size not found in attributes, using default: 50");
|
||||
}
|
||||
|
||||
if(this.step.getAttributes().containsKey("min_chunk_length_to_embed")){
|
||||
this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed");
|
||||
}else{
|
||||
this.min_chunk_length_to_embed = 50;
|
||||
logger.info("min_chunk_length_to_embed not found in attributes, using default: 50");
|
||||
}
|
||||
|
||||
if(this.step.getAttributes().containsKey("max_num_chunks")){
|
||||
this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks");
|
||||
}else{
|
||||
this.max_num_chunks = 1000;
|
||||
logger.info("max_num_chunks not found in attributes, using default: 1000");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -67,45 +72,54 @@ public class EmbeddingDocTempSolver extends StepSolver {
|
||||
try{
|
||||
logger.info("Solving step: " + this.step.getName());
|
||||
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
|
||||
logger.info("Loading parameters for step: {}", this.step.getName());
|
||||
loadParameters();
|
||||
|
||||
logger.info("Embedding documents");
|
||||
File file = new File(this.path_file);
|
||||
logger.info("Reading file from path: {}", this.path_file);
|
||||
Tika tika = new Tika();
|
||||
tika.setMaxStringLength(-1);
|
||||
String text = tika.parseToString(file);
|
||||
logger.info("File read successfully. Length: {} characters", text.length());
|
||||
Document myDoc = new Document(text);
|
||||
|
||||
List<Document> docs = Collections.singletonList(myDoc);
|
||||
|
||||
logger.info("Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}",
|
||||
this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks);
|
||||
TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size,
|
||||
this.min_chunk_size,
|
||||
this.min_chunk_length_to_embed,
|
||||
this.max_num_chunks,
|
||||
true);
|
||||
|
||||
|
||||
logger.info("Splitting and embedding documents");
|
||||
docs.forEach(doc -> {
|
||||
List<Document> splitDocs = splitter.split(doc);
|
||||
Integer docIndex = 0;
|
||||
logger.info("Number of documents: " + splitDocs.size());
|
||||
logger.info("Number of split documents: {}", splitDocs.size());
|
||||
|
||||
for (Document splitDoc : splitDocs) {
|
||||
splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id);
|
||||
splitDoc.getMetadata().put("KsDocumentIndex",docIndex.toString());
|
||||
logger.info("DOC INDEX: ", docIndex);
|
||||
splitDoc.getMetadata().put("KsDoctype", "temp");
|
||||
logger.info("Adding split document with index {} to vector store", docIndex);
|
||||
docIndex++;
|
||||
}
|
||||
logger.info("Adding {} split documents to vector store", splitDocs.size());
|
||||
vectorStore.add(splitDocs);
|
||||
});
|
||||
logger.info("All documents embedded and added to vector store successfully");
|
||||
|
||||
}catch (Exception e){
|
||||
logger.error("Error while solvingStep: "+e.getMessage());
|
||||
logger.error("Error while solvingStep: "+e.getMessage(), e);
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
logger.info("Setting next step id: {}", this.step.getNextStepId());
|
||||
this.scenarioExecution.setNextStepId(this.step.getNextStepId());
|
||||
|
||||
logger.info("Returning scenario execution for step: {}", this.step.getName());
|
||||
return this.scenarioExecution;
|
||||
}
|
||||
|
||||
|
||||
@@ -56,10 +56,6 @@ eureka.instance.preferIpAddress: true
|
||||
|
||||
hermione.fe.url = http://localhost:5173
|
||||
|
||||
java-parser-module.url: http://java-parser-module-service.olympus.svc.cluster.local:8080
|
||||
java-re-module.url: http://java-re-module-service.olympus.svc.cluster.local:8080
|
||||
jsp-parser-module.url: http://jsp-parser-module-service.olympus.svc.cluster.local:8080
|
||||
|
||||
spring.ai.vectorstore.chroma.client.host=http://108.142.74.161
|
||||
spring.ai.vectorstore.chroma.client.port=8000
|
||||
spring.ai.vectorstore.chroma.client.key-token=tKAJfN1Yv5lP7pKorJHGfHMQhNEcM9uu
|
||||
@@ -70,7 +66,7 @@ spring.servlet.multipart.max-file-size=10MB
|
||||
spring.servlet.multipart.max-request-size=10MB
|
||||
|
||||
file.upload-dir=/mnt/hermione_storage/documents/file_input_scenarios/
|
||||
|
||||
# file.upload-dir=C:\\mnt\\hermione_storage\\documents\\file_input_scenarios\\
|
||||
|
||||
|
||||
generic-file-parser-module.url=http://generic-file-parser-module-service.olympus.svc.cluster.local:8080
|
||||
|
||||
Reference in New Issue
Block a user