파라미터 변경 #77

Merged
teddy merged 1 commits from feat/training_260202 into develop 2026-02-12 21:30:18 +09:00
4 changed files with 29 additions and 10 deletions

View File

@@ -13,4 +13,10 @@ public class EvalRunRequest {
private String uuid;
private int epoch; // best_changed_fscore_epoch_1.pth
private Integer timeoutSeconds;
private String datasetFolder;
private String outputFolder;
public String getOutputFolder() {
return this.outputFolder.toString();
}
}

View File

@@ -311,6 +311,7 @@ public class DockerTrainService {
addArg(c, "--hue-delta", req.getHueDelta());
addArg(c, "--resume-from", req.getResumeFrom());
addArg(c, "--save-interval", 1);
return c;
}
@@ -414,30 +415,28 @@ public class DockerTrainService {
c.add("docker");
c.add("run");
c.add("--name");
c.add(containerName);
c.add("--rm");
c.add("--gpus");
c.add("all");
if (ipcHost) c.add("--ipc=host");
c.add("--ipc=host");
c.add("--shm-size=" + shmSize);
c.add("-v");
c.add("/home/kcomu/data" + "/tmp:/data");
c.add("-v");
c.add(responseDir + ":/checkpoints");
c.add(image);
c.add("kamco-cd-train:latest");
c.add("python");
c.add("/workspace/change-detection-code/run_evaluation_pipeline.py");
c.add("--dataset_dir");
c.add("/data/" + uuid);
addArg(c, "--dataset-folder", req.getDatasetFolder());
addArg(c, "--output-folder", req.getOutputFolder());
c.add("--model");
c.add("/checkpoints/" + uuid + "/" + modelFile);
c.add("--epoch");
c.add(modelFile);
return c;
}

View File

@@ -5,6 +5,7 @@ import com.kamco.cd.training.model.dto.ModelTrainMngDto;
import com.kamco.cd.training.postgres.core.ModelTrainJobCoreService;
import com.kamco.cd.training.postgres.core.ModelTrainMngCoreService;
import com.kamco.cd.training.train.dto.ModelTrainJobQueuedEvent;
import com.kamco.cd.training.train.dto.TrainRunRequest;
import java.time.ZonedDateTime;
import java.util.Map;
import java.util.UUID;
@@ -32,10 +33,15 @@ public class TestJobService {
// best epoch 업데이트
modelTrainMngCoreService.updateModelMasterBestEpoch(modelId, epoch);
// 파라미터 조회
TrainRunRequest trainRunRequest = modelTrainMngCoreService.findTrainRunRequest(modelId);
Map<String, Object> params = new java.util.LinkedHashMap<>();
params.put("jobType", "EVAL");
params.put("uuid", String.valueOf(uuid));
params.put("epoch", epoch);
params.put("datasetFolder", trainRunRequest.getDatasetFolder());
params.put("outputFolder", trainRunRequest.getOutputFolder());
int nextAttemptNo = modelTrainJobCoreService.findMaxAttemptNo(modelId) + 1;

View File

@@ -68,8 +68,16 @@ public class TrainJobWorker {
modelTrainMngCoreService.markStep2InProgress(modelId, jobId);
String uuid = String.valueOf(params.get("uuid"));
int epoch = (int) params.get("epoch");
String datasetFolder = String.valueOf(params.get("datasetFolder"));
String outputFolder = String.valueOf(params.get("outputFolder"));
EvalRunRequest evalReq = new EvalRunRequest();
evalReq.setUuid(uuid);
evalReq.setEpoch(epoch);
evalReq.setTimeoutSeconds(null);
evalReq.setDatasetFolder(datasetFolder);
evalReq.setOutputFolder(outputFolder);
EvalRunRequest evalReq = new EvalRunRequest(uuid, epoch, null);
result = dockerTrainService.runEvalSync(evalReq, containerName);
} else {