update summarize

This commit is contained in:
Florinda
2025-02-25 10:57:05 +01:00
parent 042248367d
commit f43b7c2c7a
2 changed files with 49 additions and 14 deletions

View File

@@ -44,7 +44,7 @@ public class ExternalCodeGenieSolver extends StepSolver {
@Autowired @Autowired
private ScenarioExecutionRepository scenarioExecutionRepo; private ScenarioExecutionRepository scenarioExecutionRepo;
Logger logger = (Logger) LoggerFactory.getLogger(BasicQueryRagSolver.class); Logger logger = (Logger) LoggerFactory.getLogger(ExternalCodeGenieSolver.class);
private void loadParameters() { private void loadParameters() {
logger.info("Loading parameters"); logger.info("Loading parameters");

View File

@@ -46,6 +46,10 @@ public class SummarizeDocSolver extends StepSolver {
private String qai_custom_memory_id; private String qai_custom_memory_id;
private Integer max_output_token; private Integer max_output_token;
private String qai_system_prompt_template_minimum; private String qai_system_prompt_template_minimum;
private String qai_system_prompt_template_formatter;
private boolean isChunked = false;
private Double chunk_size_token_calc;
private Double perc = 0.2;
// private boolean qai_load_graph_schema=false; // private boolean qai_load_graph_schema=false;
Logger logger = (Logger) LoggerFactory.getLogger(SummarizeDocSolver.class); Logger logger = (Logger) LoggerFactory.getLogger(SummarizeDocSolver.class);
@@ -75,6 +79,8 @@ public class SummarizeDocSolver extends StepSolver {
this.qai_system_prompt_template_minimum = attributeParser this.qai_system_prompt_template_minimum = attributeParser
.parse((String) this.step.getAttributes().get("qai_system_prompt_template_minimum")); .parse((String) this.step.getAttributes().get("qai_system_prompt_template_minimum"));
this.qai_system_prompt_template_formatter = attributeParser
.parse((String) this.step.getAttributes().get("qai_system_prompt_template_formatter"));
} }
@Override @Override
@@ -99,13 +105,16 @@ public class SummarizeDocSolver extends StepSolver {
// Conta i token // Conta i token
// int tokenCount = encoding.get().encode(text).size(); // int tokenCount = encoding.get().encode(text).size();
tokenCount = encoding.get().countTokens(text); tokenCount = encoding.get().countTokens(text);
logger.info("token count input: " + tokenCount);
int charCount = text.length(); int charCount = text.length();
// Stima media caratteri per token // Stima media caratteri per token
// double charPerToken = (double) charCount / tokenCount; // double charPerToken = (double) charCount / tokenCount;
//Double output_char = (double) charCount * ((double) this.percent_summarize / 100.0); // Double output_charD = (double) charCount * ((double) this.percent_summarize / 100.0);
Double min_output_token = (double) tokenCount * ((double) this.percent_summarize / 100.0); // Integer output_char = output_charD.intValue();
Double min_output_tokenD = (double) tokenCount * ((double) this.percent_summarize / 100.0);
Integer min_output_token = min_output_tokenD.intValue();
String content = new String(""); String content = new String("");
content = this.qai_system_prompt_template.replace("max_number_token", content = this.qai_system_prompt_template.replace("max_number_token",
max_output_token.toString()); max_output_token.toString());
@@ -115,16 +124,32 @@ public class SummarizeDocSolver extends StepSolver {
"min_number_token", "min_number_token",
min_output_token.toString()); min_output_token.toString());
} }
chunk_size_token_calc = (Double)((double)tokenCount * perc);
if(chunk_size_token_calc>chunk_size_token){
chunk_size_token_calc = (double)chunk_size_token;
}
// **Fase di Summarization** // **Fase di Summarization**
String summarizedText = summarize(text); // 🔹 Applica la funzione di riassunto String summarizedText = summarize(text); // 🔹 Applica la funzione di riassunto
// String template = this.qai_system_prompt_template+" The output length should // String template = this.qai_system_prompt_template+" The output length should
// be of " + output_char + " characters"; // be of " + output_char + " characters";
logger.info("template: " + content);
// Creazione dei messaggi per il modello AI // Creazione dei messaggi per il modello AI
Message userMessage = new UserMessage(summarizedText); Message userMessage = new UserMessage(summarizedText);
Message systemMessage = new SystemMessage(content); Message systemMessage = null;
logger.info("template: " + systemMessage.getContent().toString());
int tokenCountSummary = encoding.get().countTokens(summarizedText);
if(isChunked && tokenCountSummary < max_output_token){
systemMessage = new SystemMessage(this.qai_system_prompt_template_formatter);
logger.info("template formatter: " + this.qai_system_prompt_template_formatter);
}else{
//here
systemMessage = new SystemMessage(content);
logger.info("template: " + content);
}
CallResponseSpec resp = chatClient.prompt() CallResponseSpec resp = chatClient.prompt()
.messages(userMessage, systemMessage) .messages(userMessage, systemMessage)
.advisors(advisor -> advisor .advisors(advisor -> advisor
@@ -144,6 +169,9 @@ public class SummarizeDocSolver extends StepSolver {
logger.info("Token usage information is not available."); logger.info("Token usage information is not available.");
} }
tokenCount = encoding.get().countTokens(output);
logger.info("token count output: " + tokenCount);
// Salvataggio dell'output nel contesto di esecuzione // Salvataggio dell'output nel contesto di esecuzione
this.scenarioExecution.getExecSharedMap().put(this.qai_output_variable, output); this.scenarioExecution.getExecSharedMap().put(this.qai_output_variable, output);
this.scenarioExecution.setNextStepId(this.step.getNextStepId()); this.scenarioExecution.setNextStepId(this.step.getNextStepId());
@@ -160,11 +188,13 @@ public class SummarizeDocSolver extends StepSolver {
// Se il testo è già corto, non riassumere // Se il testo è già corto, non riassumere
logger.info("length: " + text.length()); logger.info("length: " + text.length());
Double chunk_size_text; Double chunk_size_text;
tokenCount = encoding.get().countTokens(text);
int textLengthPlus = (int) (text.length() * 1.1); int textLengthPlus = (int) (text.length() * 1.1);
int tokenCountPlus = (int) (tokenCount * 1.1); int tokenCountPlus = (int) (tokenCount * 1.1);
// chunk_size_token/(ratio+10%)
// Double ratio = Math.floor((textLengthPlus / charMax) + 1); Double ratio = Math.floor((tokenCountPlus / chunk_size_token_calc) + 1);
Double ratio = Math.floor((tokenCountPlus / chunk_size_token) + 1); //Double ratio = Math.floor((tokenCountPlus / chunk_size_token) + 1);
if (ratio == 1) { if (ratio == 1) {
return text; return text;
} else { } else {
@@ -174,8 +204,9 @@ public class SummarizeDocSolver extends StepSolver {
// Suddividere il testo in chunk // Suddividere il testo in chunk
List<String> chunks = chunkText(text, chunk_size_text.intValue()); List<String> chunks = chunkText(text, chunk_size_text.intValue());
List<String> summarizedChunks = new ArrayList<>(); List<String> summarizedChunks = new ArrayList<>();
Double maxTokenChunkD = Math.ceil(chunk_size_token / ratio); //Double maxTokenChunkD = Math.ceil(chunk_size_token / ratio);
Double maxTokenChunkD = Math.ceil(chunk_size_token_calc / ratio);
maxTokenChunk = maxTokenChunkD.intValue(); maxTokenChunk = maxTokenChunkD.intValue();
// Riassumere ogni chunk singolarmente // Riassumere ogni chunk singolarmente
@@ -184,12 +215,13 @@ public class SummarizeDocSolver extends StepSolver {
summarizedChunks.add(summarizeChunk(chunk)); summarizedChunks.add(summarizeChunk(chunk));
} }
isChunked = true;
// Unire i riassunti // Unire i riassunti
String summarizedText = String.join(" ", summarizedChunks); String summarizedText = String.join(" ", summarizedChunks);
int tokenCountSummarizedText = encoding.get().countTokens(summarizedText); int tokenCountSummarizedText = encoding.get().countTokens(summarizedText);
// Se il riassunto è ancora troppo lungo, applicare ricorsione // Se il riassunto è ancora troppo lungo, applicare ricorsione
if (tokenCountSummarizedText > chunk_size_token) { if (tokenCountSummarizedText > max_output_token) {
return summarize(summarizedText); return summarize(summarizedText);
} else { } else {
return summarizedText; return summarizedText;
@@ -210,13 +242,16 @@ public class SummarizeDocSolver extends StepSolver {
private String summarizeChunk(String chunk) { private String summarizeChunk(String chunk) {
String content = new String(""); String content = new String("");
if (maxTokenChunk < max_output_token) { /* if (maxTokenChunk < max_output_token) {
content = this.qai_system_prompt_template_chunk.replace("max_number_token", content = this.qai_system_prompt_template_chunk.replace("max_number_token",
maxTokenChunk.toString()); maxTokenChunk.toString());
}else{ }else{
content = this.qai_system_prompt_template_chunk.replace("max_number_token", content = this.qai_system_prompt_template_chunk.replace("max_number_token",
max_output_token.toString()); max_output_token.toString());
} }*/
content = this.qai_system_prompt_template_chunk.replace("max_number_token",
maxTokenChunk.toString());
Message chunkMessage = new UserMessage(chunk); Message chunkMessage = new UserMessage(chunk);
Message systemMessage = new SystemMessage( Message systemMessage = new SystemMessage(