Merged PR 62: Develop Q&A on Doc scenario

Develop Q&A on Doc scenario
This commit is contained in:
2025-02-21 09:03:46 +00:00
3 changed files with 189 additions and 0 deletions

View File

@@ -49,6 +49,8 @@ import com.olympus.hermione.stepSolvers.AdvancedAIPromptSolver;
import com.olympus.hermione.stepSolvers.SummarizeDocSolver;
import com.olympus.hermione.stepSolvers.BasicAIPromptSolver;
import com.olympus.hermione.stepSolvers.BasicQueryRagSolver;
import com.olympus.hermione.stepSolvers.DeleteDocTempSolver;
import com.olympus.hermione.stepSolvers.EmbeddingDocTempSolver;
import com.olympus.hermione.stepSolvers.QueryNeo4JSolver;
import com.olympus.hermione.stepSolvers.SourceCodeRagSolver;
import com.olympus.hermione.stepSolvers.StepSolver;
@@ -267,6 +269,12 @@ public class ScenarioExecutionService {
case "OLYMPUS_QUERY_AI":
solver = new OlynmpusChatClientSolver();
break;
case "EMBED_TEMPORARY_DOC":
solver = new EmbeddingDocTempSolver();
break;
case "DELETE_TEMPORARY_DOC":
solver = new DeleteDocTempSolver();
break;
default:
break;
}

View File

@@ -0,0 +1,75 @@
package com.olympus.hermione.stepSolvers;
import ch.qos.logback.classic.Logger;
import com.olympus.hermione.models.ScenarioExecution;
import com.olympus.hermione.utility.AttributeParser;
import java.util.List;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.SearchRequest;
public class DeleteDocTempSolver extends StepSolver {
private String rag_filter;
private int topk;
private double threshold;
private String query;
Logger logger = (Logger) LoggerFactory.getLogger(BasicQueryRagSolver.class);
private void loadParameters(){
logger.info("Loading parameters");
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
AttributeParser attributeParser = new AttributeParser(this.scenarioExecution);
// this.scenario_execution_id = attributeParser.parse((String) this.scenarioExecution.getId());
this.rag_filter = attributeParser.parse((String) this.step.getAttributes().get("rag_filter"));
if(this.step.getAttributes().containsKey("rag_query")){
this.query = (String) this.step.getAttributes().get("rag_query");
}else{
this.query = "*";
}
if(this.step.getAttributes().containsKey("rag_topk")){
this.topk = (int) this.step.getAttributes().get("rag_topk");
}else{
this.topk = 1000;
}
if(this.step.getAttributes().containsKey("rag_threshold")){
this.threshold = (double) this.step.getAttributes().get("rag_threshold");
}else{
this.threshold = 0.0;
}
}
@Override
public ScenarioExecution solveStep(){
System.out.println("Solving step: " + this.step.getName());
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
loadParameters();
SearchRequest searchRequest = SearchRequest.defaults()
.withQuery(this.query)
.withTopK(this.topk)
.withSimilarityThreshold(this.threshold)
.withFilterExpression(this.rag_filter);
List<Document> docs = vectorStore.similaritySearch(searchRequest);
List<String> ids = docs.stream().map(Document::getId).toList();
vectorStore.delete(ids);
this.scenarioExecution.setNextStepId(this.step.getNextStepId());
return this.scenarioExecution;
}
}

View File

@@ -0,0 +1,106 @@
package com.olympus.hermione.stepSolvers;
import ch.qos.logback.classic.Logger;
import com.olympus.hermione.models.ScenarioExecution;
import com.olympus.hermione.utility.AttributeParser;
import java.io.File;
import java.util.Collections;
import java.util.List;
import org.apache.tika.Tika;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
public class EmbeddingDocTempSolver extends StepSolver {
private String scenario_execution_id;
private String path_file;
private int default_chunk_size;
private int min_chunk_size;
private int min_chunk_length_to_embed;
private int max_num_chunks;
Logger logger = (Logger) LoggerFactory.getLogger(BasicQueryRagSolver.class);
private void loadParameters(){
logger.info("Loading parameters");
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
logger.info("Scenario Execution ID: "+this.scenarioExecution.getId());
AttributeParser attributeParser = new AttributeParser(this.scenarioExecution);
this.scenario_execution_id = attributeParser.parse((String) this.scenarioExecution.getId());
this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file"));
if(this.step.getAttributes().containsKey("default_chunk_size")){
this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size");
}else{
this.default_chunk_size = 8000;
}
if(this.step.getAttributes().containsKey("min_chunk_size")){
this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size");
}else{
this.min_chunk_size = 50;
}
if(this.step.getAttributes().containsKey("min_chunk_length_to_embed")){
this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed");
}else{
this.min_chunk_length_to_embed = 50;
}
if(this.step.getAttributes().containsKey("max_num_chunks")){
this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks");
}else{
this.max_num_chunks = 1000;
}
}
@Override
public ScenarioExecution solveStep(){
try{
logger.info("Solving step: " + this.step.getName());
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
loadParameters();
logger.info("Embedding documents");
File file = new File(this.path_file);
Tika tika = new Tika();
tika.setMaxStringLength(-1);
String text = tika.parseToString(file);
Document myDoc = new Document(text);
List<Document> docs = Collections.singletonList(myDoc);
TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size,
this.min_chunk_size,
this.min_chunk_length_to_embed,
this.max_num_chunks,
true);
docs.forEach(doc -> {
List<Document> splitDocs = splitter.split(doc);
logger.info("Number of documents: " + splitDocs.size());
splitDocs.forEach(splitDoc -> {
splitDoc.getMetadata().put("KsScenarioExecutionId", this.scenario_execution_id);
});
vectorStore.add(splitDocs);
});
}catch (Exception e){
logger.error("Error while solvingStep: "+e.getMessage());
e.printStackTrace();
}
this.scenarioExecution.setNextStepId(this.step.getNextStepId());
return this.scenarioExecution;
}
}