Merged PR 196: Update downloadFile and EmbeddingDoc step

This commit is contained in:
2025-07-10 15:27:53 +00:00
5 changed files with 148 additions and 127 deletions

View File

@@ -7,23 +7,18 @@ import java.util.List;
import java.util.Random;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpStatus;
import org.springframework.core.io.Resource;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import com.netflix.discovery.converters.Auto;
import com.olympus.dto.FileUploadDTO;
import com.olympus.hermione.dto.FileDeleteRequest;
import com.olympus.hermione.services.FileService;
import com.olympus.model.apollo.KSDocument;
import com.olympus.model.apollo.KSIngestionInfo;
@RestController
public class FileController {
@@ -44,4 +39,12 @@ public class FileController {
public ResponseEntity<String> deleteFile(@RequestBody FileDeleteRequest request) {
return fileService.deleteFile(request);
}
@GetMapping("/downloadFile")
public ResponseEntity<Resource> downloadFile(
@RequestParam("filePath") String filePath,
@RequestParam("executionId") String executionId) {
return fileService.downloadFile(filePath, executionId);
}
}

View File

@@ -1,46 +0,0 @@
package com.olympus.hermione.controllers.dashboard;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import com.olympus.hermione.repository.dashboard.DashboardChatModel;
import com.olympus.hermione.repository.dashboard.DashboardScenarioChatRepository;
@RestController
public class DashboardChatController {
/*@Autowired
private DashboardScenarioChatRepository scenarioTestChatRepository;
// Recupera tutti i messaggi
@GetMapping("/chat/all")
public List<DashboardChatModel> getAllMessages() {
return scenarioTestChatRepository.findAll();
}
// Filtra per userId
@GetMapping("/chat/byUser")
public List<DashboardChatModel> getByUserId(@RequestParam String userId) {
return scenarioTestChatRepository.findByUserId(userId);
}
// Filtra per conversationId
@GetMapping("/chat/byConversation")
public List<DashboardChatModel> getByConversationId(@RequestParam String conversationId) {
return scenarioTestChatRepository.findByConversationId(conversationId);
}
// Filtra per scenarioId
@GetMapping("/chat/byScenario")
public List<DashboardChatModel> getByScenarioId(@RequestParam String scenarioId) {
return scenarioTestChatRepository.findByScenarioId(scenarioId);
}
@PostMapping("/save")
public DashboardChatModel saveMessage(@RequestBody DashboardChatModel message) { //salva i nuovi messaggi
return scenarioTestChatRepository.save(message);
}
*/
}

View File

@@ -2,28 +2,24 @@ package com.olympus.hermione.services;
import org.springframework.stereotype.Service;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Random;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import com.olympus.dto.FileUploadDTO;
import com.olympus.hermione.dto.FileDeleteRequest;
import com.olympus.model.apollo.KSDocument;
import com.olympus.model.apollo.KSIngestionInfo;
@Service
public class FileService {
@@ -147,4 +143,78 @@ public class FileService {
return null;
}
/**
* Scarica un file dal path specificato
*
* @param filePath Il path relativo del file da scaricare
* @param executionId L'ID dell'esecuzione per il logging
* @return ResponseEntity con il file come Resource
*/
public ResponseEntity<Resource> downloadFile(String filePath, String executionId) {
try {
logger.info("Downloading file: {} for execution: {}", filePath, executionId);
// Normalizza il path rimuovendo eventuali separatori doppi o tripli
String normalizedPath = filePath.replaceAll("[\\\\/]+", "/");
// Costruisci il path completo del file
Path file = Paths.get(uploadDir, normalizedPath);
logger.info("Upload directory: {}", uploadDir);
logger.info("Normalized relative path: {}", normalizedPath);
logger.info("Full file path: {}", file.toAbsolutePath());
// Verifica che il file esista e sia leggibile
if (!Files.exists(file)) {
logger.error("File not found: {}", file.toAbsolutePath());
return ResponseEntity.notFound().build();
}
if (!Files.isReadable(file)) {
logger.error("File not readable: {}", file.toAbsolutePath());
return ResponseEntity.status(HttpStatus.FORBIDDEN).build();
}
// Verifica che il file sia all'interno della directory di upload (security check)
Path uploadPath = Paths.get(uploadDir).toAbsolutePath().normalize();
Path filePath_abs = file.toAbsolutePath().normalize();
if (!filePath_abs.startsWith(uploadPath)) {
logger.error("Security violation: attempting to access file outside upload directory: {}", filePath_abs);
return ResponseEntity.status(HttpStatus.FORBIDDEN).build();
}
// Crea la risorsa file
Resource resource = new FileSystemResource(file);
// Determina il content type
String contentType = Files.probeContentType(file);
if (contentType == null) {
contentType = "application/octet-stream";
}
// Estrai il nome del file
String fileName = file.getFileName().toString();
// Costruisci gli header per il download
HttpHeaders headers = new HttpHeaders();
headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + fileName + "\"");
headers.add(HttpHeaders.CONTENT_TYPE, contentType);
headers.add(HttpHeaders.CONTENT_LENGTH, String.valueOf(resource.contentLength()));
logger.info("File downloaded successfully: {} (size: {} bytes)", fileName, resource.contentLength());
return ResponseEntity.ok()
.headers(headers)
.contentType(MediaType.parseMediaType(contentType))
.body(resource);
} catch (IOException e) {
logger.error("Error downloading file: {} - {}", filePath, e.getMessage(), e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build();
} catch (Exception e) {
logger.error("Unexpected error downloading file: {} - {}", filePath, e.getMessage(), e);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build();
}
}
}

View File

@@ -331,16 +331,9 @@ public class ScenarioExecutionService {
scenarioExecution.setScenario(scenario);
// prendi i file dalla cartella temporanea se è presente una chiave con name
// "MultiFileUpload"
if (scenarioExecutionInput.getInputs().containsKey("MultiFileUpload")) {
folder_name = scenarioExecutionInput.getInputs().get("MultiFileUpload");
if (folder_name != null && !folder_name.equals("")) {
try {
String base64 = folderToBase64(folder_name);
scenarioExecutionInput.getInputs().put("MultiFileUpload", base64);
} catch (Exception e) {
logger.error("Error while converting folder to base64: " + e.getMessage());
}
}
}
if (scenarioExecutionInput.getInputs().containsKey("SingleFileUpload")) {
scenarioExecutionInput.getInputs().put("SingleFileUpload",

View File

@@ -4,9 +4,11 @@ import ch.qos.logback.classic.Logger;
import com.olympus.hermione.models.ScenarioExecution;
import com.olympus.hermione.utility.AttributeParser;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.slf4j.LoggerFactory;
import org.springframework.ai.document.Document;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
@@ -20,11 +22,9 @@ public class EmbeddingDocTempSolver extends StepSolver {
private int min_chunk_length_to_embed;
private int max_num_chunks;
Logger logger = (Logger) LoggerFactory.getLogger(EmbeddingDocTempSolver.class);
private void loadParameters(){
private void loadParameters() {
logger.info("Loading parameters");
this.scenarioExecution.getExecSharedMap().put("scenario_execution_id", this.scenarioExecution.getId());
logger.info("Scenario Execution ID: {}", this.scenarioExecution.getId());
@@ -36,77 +36,83 @@ public class EmbeddingDocTempSolver extends StepSolver {
this.path_file = attributeParser.parse((String) this.step.getAttributes().get("path_file"));
logger.info("Parsed path_file: {}", this.path_file);
if(this.step.getAttributes().containsKey("default_chunk_size")){
if (this.step.getAttributes().containsKey("default_chunk_size")) {
this.default_chunk_size = (int) this.step.getAttributes().get("default_chunk_size");
logger.info("Parsed default_chunk_size from attributes: {}", this.default_chunk_size);
}else{
} else {
this.default_chunk_size = 8000;
logger.info("default_chunk_size not found in attributes, using default: 8000");
}
if(this.step.getAttributes().containsKey("min_chunk_size")){
if (this.step.getAttributes().containsKey("min_chunk_size")) {
this.min_chunk_size = (int) this.step.getAttributes().get("min_chunk_size");
}else{
} else {
this.min_chunk_size = 50;
logger.info("min_chunk_size not found in attributes, using default: 50");
}
if(this.step.getAttributes().containsKey("min_chunk_length_to_embed")){
if (this.step.getAttributes().containsKey("min_chunk_length_to_embed")) {
this.min_chunk_length_to_embed = (int) this.step.getAttributes().get("min_chunk_length_to_embed");
}else{
} else {
this.min_chunk_length_to_embed = 50;
logger.info("min_chunk_length_to_embed not found in attributes, using default: 50");
}
if(this.step.getAttributes().containsKey("max_num_chunks")){
if (this.step.getAttributes().containsKey("max_num_chunks")) {
this.max_num_chunks = (int) this.step.getAttributes().get("max_num_chunks");
}else{
} else {
this.max_num_chunks = 1000;
logger.info("max_num_chunks not found in attributes, using default: 1000");
}
}
@Override
public ScenarioExecution solveStep(){
public ScenarioExecution solveStep() {
try{
logger.info("Solving step: " + this.step.getName());
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
logger.info("Loading parameters for step: {}", this.step.getName());
loadParameters();
logger.info("Embedding documents");
File file = new File(this.path_file);
logger.info("Reading file from path: {}", this.path_file);
Tika tika = new Tika();
tika.setMaxStringLength(-1);
String text = tika.parseToString(file);
logger.info("File read successfully. Length: {} characters", text.length());
Document myDoc = new Document(text);
logger.info("Solving step: " + this.step.getName());
this.scenarioExecution.setCurrentStepId(this.step.getStepId());
logger.info("Loading parameters for step: {}", this.step.getName());
loadParameters();
logger.info("Embedding documents");
File file = new File(this.path_file);
logger.info("Reading file from path: {}", this.path_file);
Tika tika = new Tika();
tika.setMaxStringLength(-1);
String text;
try {
text = tika.parseToString(file);
} catch (IOException | TikaException e) {
logger.error("Error parsing file: ", e);
throw new RuntimeException("Error parsing file", e);
}
logger.info("File read successfully. Length: {} characters", text.length());
Document myDoc = new Document(text);
List<Document> docs = Collections.singletonList(myDoc);
List<Document> docs = Collections.singletonList(myDoc);
logger.info("Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}",
logger.info(
"Initializing TokenTextSplitter with default_chunk_size={}, min_chunk_size={}, min_chunk_length_to_embed={}, max_num_chunks={}",
this.default_chunk_size, this.min_chunk_size, this.min_chunk_length_to_embed, this.max_num_chunks);
TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size,
this.min_chunk_size,
this.min_chunk_length_to_embed,
this.max_num_chunks,
true);
TokenTextSplitter splitter = new TokenTextSplitter(this.default_chunk_size,
this.min_chunk_size,
this.min_chunk_length_to_embed,
this.max_num_chunks,
true);
logger.info("Splitting and embedding documents");
docs.forEach(doc -> {
List<Document> splitDocs = splitter.split(doc);
Integer docIndex = 0;
logger.info("Number of split documents: {}", splitDocs.size());
logger.info("Splitting and embedding documents");
docs.forEach(doc -> {
List<Document> splitDocs = splitter.split(doc);
Integer docIndex = 0;
logger.info("Number of split documents: {}", splitDocs.size());
for (Document splitDoc : splitDocs) {
splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id);
splitDoc.getMetadata().put("KsDocumentIndex",docIndex.toString());
splitDoc.getMetadata().put("KsDoctype", "temp");
logger.info("Adding split document with index {} to vector store", docIndex);
docIndex++;
}
logger.info("Adding {} split documents to vector store", splitDocs.size());
for (Document splitDoc : splitDocs) {
splitDoc.getMetadata().put("KsDocumentId", this.scenario_execution_id);
splitDoc.getMetadata().put("KsDocumentIndex", docIndex.toString());
splitDoc.getMetadata().put("KsDoctype", "temp");
logger.info("Adding split document with index {} to vector store", docIndex);
docIndex++;
}
logger.info("Adding {} split documents to vector store", splitDocs.size());
// Carica un massimo di 10 documenti per volta
int batchSize = 10;
@@ -116,14 +122,9 @@ public class EmbeddingDocTempSolver extends StepSolver {
vectorStore.add(batch);
logger.info("Added batch of {} documents to vector store (from {} to {})", batch.size(), i, end - 1);
}
//vectorStore.add(splitDocs);
});
logger.info("All documents embedded and added to vector store successfully");
}catch (Exception e){
logger.error("Error while solvingStep: "+e.getMessage(), e);
e.printStackTrace();
}
// vectorStore.add(splitDocs);
});
logger.info("All documents embedded and added to vector store successfully");
logger.info("Setting next step id: {}", this.step.getNextStepId());
this.scenarioExecution.setNextStepId(this.step.getNextStepId());