Develop Q&A on Doc scenario
This commit is contained in:
@@ -49,6 +49,8 @@ import com.olympus.hermione.stepSolvers.AdvancedAIPromptSolver;
|
|||||||
import com.olympus.hermione.stepSolvers.SummarizeDocSolver;
|
import com.olympus.hermione.stepSolvers.SummarizeDocSolver;
|
||||||
import com.olympus.hermione.stepSolvers.BasicAIPromptSolver;
|
import com.olympus.hermione.stepSolvers.BasicAIPromptSolver;
|
||||||
import com.olympus.hermione.stepSolvers.BasicQueryRagSolver;
|
import com.olympus.hermione.stepSolvers.BasicQueryRagSolver;
|
||||||
|
import com.olympus.hermione.stepSolvers.DeleteDocTempSolver;
|
||||||
|
import com.olympus.hermione.stepSolvers.EmbeddingDocTempSolver;
|
||||||
import com.olympus.hermione.stepSolvers.QueryNeo4JSolver;
|
import com.olympus.hermione.stepSolvers.QueryNeo4JSolver;
|
||||||
import com.olympus.hermione.stepSolvers.SourceCodeRagSolver;
|
import com.olympus.hermione.stepSolvers.SourceCodeRagSolver;
|
||||||
import com.olympus.hermione.stepSolvers.StepSolver;
|
import com.olympus.hermione.stepSolvers.StepSolver;
|
||||||
@@ -267,6 +269,12 @@ public class ScenarioExecutionService {
|
|||||||
case "OLYMPUS_QUERY_AI":
|
case "OLYMPUS_QUERY_AI":
|
||||||
solver = new OlynmpusChatClientSolver();
|
solver = new OlynmpusChatClientSolver();
|
||||||
break;
|
break;
|
||||||
|
case "EMBED_TEMPORARY_DOC":
|
||||||
|
solver = new EmbeddingDocTempSolver();
|
||||||
|
break;
|
||||||
|
case "DELETE_TEMPORARY_DOC":
|
||||||
|
solver = new DeleteDocTempSolver();
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,75 @@
|
|||||||
|
package com.olympus.hermione.stepSolvers;
|
||||||
|
|
||||||
|
import ch.qos.logback.classic.Logger;
|
||||||
|
import com.olympus.hermione.models.ScenarioExecution;
|
||||||
|
import com.olympus.hermione.utility.AttributeParser;
|
||||||
|
import java.util.List;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.ai.document.Document;
|
||||||
|
import org.springframework.ai.vectorstore.SearchRequest;
|
||||||
|
|
||||||
|
public class DeleteDocTempSolver extends StepSolver {
|
||||||
|
|
||||||
|
private String rag_filter;
|
||||||
|
private int topk;
|
||||||
|
private double threshold;
|
||||||
|
private String query;
|
||||||
|
|
||||||
|
|
||||||
|
Logger logger = (Logger) LoggerFactory.getLogger(BasicQueryRagSolver.class);
|
||||||
|
|
||||||
|
private void loadParameters(){
|
||||||
|
logger.info("Loading parameters");
|
||||||
|
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
|
||||||
|
|
||||||
|
AttributeParser attributeParser = new AttributeParser(this.scenarioExecution);
|
||||||
|
|
||||||
|
// this.scenario_execution_id = attributeParser.parse((String) this.scenarioExecution.getId());
|
||||||
|
|
||||||
|
this.rag_filter = attributeParser.parse((String) this.step.getAttributes().get("rag_filter"));
|
||||||
|
|
||||||
|
if(this.step.getAttributes().containsKey("rag_query")){
|
||||||
|
this.query = (String) this.step.getAttributes().get("rag_query");
|
||||||
|
}else{
|
||||||
|
this.query = "*";
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.step.getAttributes().containsKey("rag_topk")){
|
||||||
|
this.topk = (int) this.step.getAttributes().get("rag_topk");
|
||||||
|
}else{
|
||||||
|
this.topk = 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.step.getAttributes().containsKey("rag_threshold")){
|
||||||
|
this.threshold = (double) this.step.getAttributes().get("rag_threshold");
|
||||||
|
}else{
|
||||||
|
this.threshold = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScenarioExecution solveStep(){
|
||||||
|
|
||||||
|
System.out.println("Solving step: " + this.step.getName());
|
||||||
|
|
||||||
|
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
|
||||||
|
|
||||||
|
loadParameters();
|
||||||
|
SearchRequest searchRequest = SearchRequest.defaults()
|
||||||
|
.withQuery(this.query)
|
||||||
|
.withTopK(this.topk)
|
||||||
|
.withSimilarityThreshold(this.threshold)
|
||||||
|
.withFilterExpression(this.rag_filter);
|
||||||
|
|
||||||
|
|
||||||
|
List<Document> docs = vectorStore.similaritySearch(searchRequest);
|
||||||
|
List<String> ids = docs.stream().map(Document::getId).toList();
|
||||||
|
vectorStore.delete(ids);
|
||||||
|
|
||||||
|
this.scenarioExecution.setNextStepId(this.step.getNextStepId());
|
||||||
|
|
||||||
|
return this.scenarioExecution;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,106 @@
|
|||||||
|
package com.olympus.hermione.stepSolvers;
|
||||||
|
|
||||||
|
import ch.qos.logback.classic.Logger;
|
||||||
|
import com.olympus.hermione.models.ScenarioExecution;
|
||||||
|
import com.olympus.hermione.utility.AttributeParser;
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.tika.Tika;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.ai.document.Document;
|
||||||
|
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
||||||
|
|
||||||
|
public class EmbeddingDocTempSolver extends StepSolver {
|
||||||
|
|
||||||
|
private String scenario_execution_id;
|
||||||
|
private String path_file;
|
||||||
|
private int default_chunk_size;
|
||||||
|
private int min_chunk_size;
|
||||||
|
private int min_chunk_length_to_embed;
|
||||||
|
private int max_num_chunks;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Logger logger = (Logger) LoggerFactory.getLogger(BasicQueryRagSolver.class);
|
||||||
|
|
||||||
|
private void loadParameters(){
|
||||||
|
logger.info("Loading parameters");
|
||||||
|
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
|
||||||
|
logger.info("Scenario Execution ID: "+this.scenarioExecution.getId());
|
||||||
|
|
||||||
|
AttributeParser attributeParser = new AttributeParser(this.scenarioExecution);
|
||||||
|
|
||||||
|
this.scenario_execution_id = attributeParser.parse((String) this.scenarioExecution.getId());
|
||||||
|
|
||||||
|
this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file"));
|
||||||
|
|
||||||
|
if(this.step.getAttributes().containsKey("default_chunk_size")){
|
||||||
|
this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size");
|
||||||
|
}else{
|
||||||
|
this.default_chunk_size = 8000;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.step.getAttributes().containsKey("min_chunk_size")){
|
||||||
|
this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size");
|
||||||
|
}else{
|
||||||
|
this.min_chunk_size = 50;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.step.getAttributes().containsKey("min_chunk_length_to_embed")){
|
||||||
|
this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed");
|
||||||
|
}else{
|
||||||
|
this.min_chunk_length_to_embed = 50;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.step.getAttributes().containsKey("max_num_chunks")){
|
||||||
|
this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks");
|
||||||
|
}else{
|
||||||
|
this.max_num_chunks = 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScenarioExecution solveStep(){
|
||||||
|
|
||||||
|
try{
|
||||||
|
logger.info("Solving step: " + this.step.getName());
|
||||||
|
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
|
||||||
|
loadParameters();
|
||||||
|
|
||||||
|
logger.info("Embedding documents");
|
||||||
|
File file = new File(this.path_file);
|
||||||
|
Tika tika = new Tika();
|
||||||
|
tika.setMaxStringLength(-1);
|
||||||
|
String text = tika.parseToString(file);
|
||||||
|
Document myDoc = new Document(text);
|
||||||
|
|
||||||
|
List<Document> docs = Collections.singletonList(myDoc);
|
||||||
|
|
||||||
|
TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size,
|
||||||
|
this.min_chunk_size,
|
||||||
|
this.min_chunk_length_to_embed,
|
||||||
|
this.max_num_chunks,
|
||||||
|
true);
|
||||||
|
|
||||||
|
docs.forEach(doc -> {
|
||||||
|
List<Document> splitDocs = splitter.split(doc);
|
||||||
|
logger.info("Number of documents: " + splitDocs.size());
|
||||||
|
|
||||||
|
splitDocs.forEach(splitDoc -> {
|
||||||
|
splitDoc.getMetadata().put("KsScenarioExecutionId", this.scenario_execution_id);
|
||||||
|
});
|
||||||
|
vectorStore.add(splitDocs);
|
||||||
|
});
|
||||||
|
}catch (Exception e){
|
||||||
|
logger.error("Error while solvingStep: "+e.getMessage());
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
this.scenarioExecution.setNextStepId(this.step.getNextStepId());
|
||||||
|
|
||||||
|
return this.scenarioExecution;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user