Merge pull request '파라미터 변경' (#77) from feat/training_260202 into develop
Reviewed-on: #77
This commit was merged in pull request #77.
This commit is contained in:
@@ -13,4 +13,10 @@ public class EvalRunRequest {
|
|||||||
private String uuid;
|
private String uuid;
|
||||||
private int epoch; // best_changed_fscore_epoch_1.pth
|
private int epoch; // best_changed_fscore_epoch_1.pth
|
||||||
private Integer timeoutSeconds;
|
private Integer timeoutSeconds;
|
||||||
|
private String datasetFolder;
|
||||||
|
private String outputFolder;
|
||||||
|
|
||||||
|
public String getOutputFolder() {
|
||||||
|
return this.outputFolder.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -311,6 +311,7 @@ public class DockerTrainService {
|
|||||||
addArg(c, "--hue-delta", req.getHueDelta());
|
addArg(c, "--hue-delta", req.getHueDelta());
|
||||||
|
|
||||||
addArg(c, "--resume-from", req.getResumeFrom());
|
addArg(c, "--resume-from", req.getResumeFrom());
|
||||||
|
addArg(c, "--save-interval", 1);
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -414,30 +415,28 @@ public class DockerTrainService {
|
|||||||
|
|
||||||
c.add("docker");
|
c.add("docker");
|
||||||
c.add("run");
|
c.add("run");
|
||||||
c.add("--name");
|
|
||||||
c.add(containerName);
|
|
||||||
c.add("--rm");
|
c.add("--rm");
|
||||||
|
|
||||||
c.add("--gpus");
|
c.add("--gpus");
|
||||||
c.add("all");
|
c.add("all");
|
||||||
if (ipcHost) c.add("--ipc=host");
|
c.add("--ipc=host");
|
||||||
c.add("--shm-size=" + shmSize);
|
c.add("--shm-size=" + shmSize);
|
||||||
|
|
||||||
c.add("-v");
|
c.add("-v");
|
||||||
c.add("/home/kcomu/data" + "/tmp:/data");
|
c.add("/home/kcomu/data" + "/tmp:/data");
|
||||||
|
|
||||||
c.add("-v");
|
c.add("-v");
|
||||||
c.add(responseDir + ":/checkpoints");
|
c.add(responseDir + ":/checkpoints");
|
||||||
|
|
||||||
c.add(image);
|
c.add("kamco-cd-train:latest");
|
||||||
|
|
||||||
c.add("python");
|
c.add("python");
|
||||||
c.add("/workspace/change-detection-code/run_evaluation_pipeline.py");
|
c.add("/workspace/change-detection-code/run_evaluation_pipeline.py");
|
||||||
|
|
||||||
c.add("--dataset_dir");
|
addArg(c, "--dataset-folder", req.getDatasetFolder());
|
||||||
c.add("/data/" + uuid);
|
addArg(c, "--output-folder", req.getOutputFolder());
|
||||||
|
|
||||||
c.add("--model");
|
c.add("--epoch");
|
||||||
c.add("/checkpoints/" + uuid + "/" + modelFile);
|
c.add(modelFile);
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import com.kamco.cd.training.model.dto.ModelTrainMngDto;
|
|||||||
import com.kamco.cd.training.postgres.core.ModelTrainJobCoreService;
|
import com.kamco.cd.training.postgres.core.ModelTrainJobCoreService;
|
||||||
import com.kamco.cd.training.postgres.core.ModelTrainMngCoreService;
|
import com.kamco.cd.training.postgres.core.ModelTrainMngCoreService;
|
||||||
import com.kamco.cd.training.train.dto.ModelTrainJobQueuedEvent;
|
import com.kamco.cd.training.train.dto.ModelTrainJobQueuedEvent;
|
||||||
|
import com.kamco.cd.training.train.dto.TrainRunRequest;
|
||||||
import java.time.ZonedDateTime;
|
import java.time.ZonedDateTime;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
@@ -32,10 +33,15 @@ public class TestJobService {
|
|||||||
// best epoch 업데이트
|
// best epoch 업데이트
|
||||||
modelTrainMngCoreService.updateModelMasterBestEpoch(modelId, epoch);
|
modelTrainMngCoreService.updateModelMasterBestEpoch(modelId, epoch);
|
||||||
|
|
||||||
|
// 파라미터 조회
|
||||||
|
TrainRunRequest trainRunRequest = modelTrainMngCoreService.findTrainRunRequest(modelId);
|
||||||
|
|
||||||
Map<String, Object> params = new java.util.LinkedHashMap<>();
|
Map<String, Object> params = new java.util.LinkedHashMap<>();
|
||||||
params.put("jobType", "EVAL");
|
params.put("jobType", "EVAL");
|
||||||
params.put("uuid", String.valueOf(uuid));
|
params.put("uuid", String.valueOf(uuid));
|
||||||
params.put("epoch", epoch);
|
params.put("epoch", epoch);
|
||||||
|
params.put("datasetFolder", trainRunRequest.getDatasetFolder());
|
||||||
|
params.put("outputFolder", trainRunRequest.getOutputFolder());
|
||||||
|
|
||||||
int nextAttemptNo = modelTrainJobCoreService.findMaxAttemptNo(modelId) + 1;
|
int nextAttemptNo = modelTrainJobCoreService.findMaxAttemptNo(modelId) + 1;
|
||||||
|
|
||||||
|
|||||||
@@ -68,8 +68,16 @@ public class TrainJobWorker {
|
|||||||
modelTrainMngCoreService.markStep2InProgress(modelId, jobId);
|
modelTrainMngCoreService.markStep2InProgress(modelId, jobId);
|
||||||
String uuid = String.valueOf(params.get("uuid"));
|
String uuid = String.valueOf(params.get("uuid"));
|
||||||
int epoch = (int) params.get("epoch");
|
int epoch = (int) params.get("epoch");
|
||||||
|
String datasetFolder = String.valueOf(params.get("datasetFolder"));
|
||||||
|
String outputFolder = String.valueOf(params.get("outputFolder"));
|
||||||
|
|
||||||
|
EvalRunRequest evalReq = new EvalRunRequest();
|
||||||
|
evalReq.setUuid(uuid);
|
||||||
|
evalReq.setEpoch(epoch);
|
||||||
|
evalReq.setTimeoutSeconds(null);
|
||||||
|
evalReq.setDatasetFolder(datasetFolder);
|
||||||
|
evalReq.setOutputFolder(outputFolder);
|
||||||
|
|
||||||
EvalRunRequest evalReq = new EvalRunRequest(uuid, epoch, null);
|
|
||||||
result = dockerTrainService.runEvalSync(evalReq, containerName);
|
result = dockerTrainService.runEvalSync(evalReq, containerName);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user