파라미터 변경 #77
@@ -13,4 +13,10 @@ public class EvalRunRequest {
|
||||
private String uuid;
|
||||
private int epoch; // best_changed_fscore_epoch_1.pth
|
||||
private Integer timeoutSeconds;
|
||||
private String datasetFolder;
|
||||
private String outputFolder;
|
||||
|
||||
public String getOutputFolder() {
|
||||
return this.outputFolder.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -311,6 +311,7 @@ public class DockerTrainService {
|
||||
addArg(c, "--hue-delta", req.getHueDelta());
|
||||
|
||||
addArg(c, "--resume-from", req.getResumeFrom());
|
||||
addArg(c, "--save-interval", 1);
|
||||
return c;
|
||||
}
|
||||
|
||||
@@ -414,30 +415,28 @@ public class DockerTrainService {
|
||||
|
||||
c.add("docker");
|
||||
c.add("run");
|
||||
c.add("--name");
|
||||
c.add(containerName);
|
||||
c.add("--rm");
|
||||
|
||||
c.add("--gpus");
|
||||
c.add("all");
|
||||
if (ipcHost) c.add("--ipc=host");
|
||||
c.add("--ipc=host");
|
||||
c.add("--shm-size=" + shmSize);
|
||||
|
||||
c.add("-v");
|
||||
c.add("/home/kcomu/data" + "/tmp:/data");
|
||||
|
||||
c.add("-v");
|
||||
c.add(responseDir + ":/checkpoints");
|
||||
|
||||
c.add(image);
|
||||
c.add("kamco-cd-train:latest");
|
||||
|
||||
c.add("python");
|
||||
c.add("/workspace/change-detection-code/run_evaluation_pipeline.py");
|
||||
|
||||
c.add("--dataset_dir");
|
||||
c.add("/data/" + uuid);
|
||||
addArg(c, "--dataset-folder", req.getDatasetFolder());
|
||||
addArg(c, "--output-folder", req.getOutputFolder());
|
||||
|
||||
c.add("--model");
|
||||
c.add("/checkpoints/" + uuid + "/" + modelFile);
|
||||
c.add("--epoch");
|
||||
c.add(modelFile);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import com.kamco.cd.training.model.dto.ModelTrainMngDto;
|
||||
import com.kamco.cd.training.postgres.core.ModelTrainJobCoreService;
|
||||
import com.kamco.cd.training.postgres.core.ModelTrainMngCoreService;
|
||||
import com.kamco.cd.training.train.dto.ModelTrainJobQueuedEvent;
|
||||
import com.kamco.cd.training.train.dto.TrainRunRequest;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
@@ -32,10 +33,15 @@ public class TestJobService {
|
||||
// best epoch 업데이트
|
||||
modelTrainMngCoreService.updateModelMasterBestEpoch(modelId, epoch);
|
||||
|
||||
// 파라미터 조회
|
||||
TrainRunRequest trainRunRequest = modelTrainMngCoreService.findTrainRunRequest(modelId);
|
||||
|
||||
Map<String, Object> params = new java.util.LinkedHashMap<>();
|
||||
params.put("jobType", "EVAL");
|
||||
params.put("uuid", String.valueOf(uuid));
|
||||
params.put("epoch", epoch);
|
||||
params.put("datasetFolder", trainRunRequest.getDatasetFolder());
|
||||
params.put("outputFolder", trainRunRequest.getOutputFolder());
|
||||
|
||||
int nextAttemptNo = modelTrainJobCoreService.findMaxAttemptNo(modelId) + 1;
|
||||
|
||||
|
||||
@@ -68,8 +68,16 @@ public class TrainJobWorker {
|
||||
modelTrainMngCoreService.markStep2InProgress(modelId, jobId);
|
||||
String uuid = String.valueOf(params.get("uuid"));
|
||||
int epoch = (int) params.get("epoch");
|
||||
String datasetFolder = String.valueOf(params.get("datasetFolder"));
|
||||
String outputFolder = String.valueOf(params.get("outputFolder"));
|
||||
|
||||
EvalRunRequest evalReq = new EvalRunRequest();
|
||||
evalReq.setUuid(uuid);
|
||||
evalReq.setEpoch(epoch);
|
||||
evalReq.setTimeoutSeconds(null);
|
||||
evalReq.setDatasetFolder(datasetFolder);
|
||||
evalReq.setOutputFolder(outputFolder);
|
||||
|
||||
EvalRunRequest evalReq = new EvalRunRequest(uuid, epoch, null);
|
||||
result = dockerTrainService.runEvalSync(evalReq, containerName);
|
||||
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user