Compare commits
10 Commits
d1593e57c3
...
91f022889b
| Author | SHA1 | Date | |
|---|---|---|---|
| 91f022889b | |||
|
|
f00296cf2c | ||
| f98f6cb038 | |||
| 732dccf2e4 | |||
| 618dbe4047 | |||
| b952ec7b47 | |||
| a5267d8065 | |||
| 39f39a4f0c | |||
| d99e18b38c | |||
| d6aa612494 |
@@ -181,15 +181,15 @@ public class ModelHyperParamEntity {
|
||||
private String metrics = "mFscore,mIoU";
|
||||
|
||||
/** Default: changed_fscore */
|
||||
@Size(max = 30)
|
||||
@Size(max = 100)
|
||||
@NotNull
|
||||
@Column(name = "save_best", nullable = false, length = 30)
|
||||
@Column(name = "save_best", nullable = false, length = 100)
|
||||
private String saveBest = "changed_fscore";
|
||||
|
||||
/** Default: greater */
|
||||
@Size(max = 10)
|
||||
@Size(max = 100)
|
||||
@NotNull
|
||||
@Column(name = "save_best_rule", nullable = false, length = 10)
|
||||
@Column(name = "save_best_rule", nullable = false, length = 100)
|
||||
private String saveBestRule = "greater";
|
||||
|
||||
/** Default: 1 */
|
||||
|
||||
@@ -56,6 +56,13 @@ public class DockerTrainService {
|
||||
@Value("${spring.profiles.active}")
|
||||
private String profile;
|
||||
|
||||
@Value("${hyper.parameter.gpus}")
|
||||
private String hyperGpus;
|
||||
|
||||
|
||||
@Value("${hyper.parameter.gpu-ids}")
|
||||
private String hyperGpuIds;
|
||||
|
||||
private final ModelTrainJobCoreService modelTrainJobCoreService;
|
||||
|
||||
/**
|
||||
@@ -285,11 +292,13 @@ public class DockerTrainService {
|
||||
// addArg(c, "--gpu-ids", req.getGpuIds()); // null
|
||||
if ("prod".equals(profile)) {
|
||||
addArg(c, "--batch-size", 2); // 학습서버 GPU 1개인 곳은 batch-size:2 까지만 가능
|
||||
addArg(c, "--gpus", "1"); // 학습서버 GPU 1개인 곳은 1이어야 함
|
||||
addArg(c, "--gpu-ids", "0"); // 학습서버 GPU 1개인 곳은 0이어야 함
|
||||
|
||||
} else {
|
||||
addArg(c, "--batch-size", req.getBatchSize()); // 학습서버 GPU 1개인 곳은 batch-size:2 까지만 가능
|
||||
}
|
||||
addArg(c, "--gpus", hyperGpus); // 학습서버 GPU 1개인 곳은 1이어야 함
|
||||
addArg(c, "--gpu-ids", hyperGpuIds); // 학습서버 GPU 1개인 곳은 0이어야 함
|
||||
|
||||
addArg(c, "--lr", req.getLearningRate());
|
||||
addArg(c, "--backbone", req.getBackbone());
|
||||
addArg(c, "--epochs", req.getEpochs());
|
||||
|
||||
@@ -41,3 +41,7 @@ train:
|
||||
container_prefix: kamco-cd-train
|
||||
shm_size: 16g
|
||||
ipc_host: true
|
||||
hyper:
|
||||
parameter:
|
||||
gpus: 4
|
||||
gpu-ids: 0,1,2,3
|
||||
|
||||
@@ -78,3 +78,8 @@ management:
|
||||
exposure:
|
||||
include:
|
||||
- "health"
|
||||
hyper:
|
||||
parameter:
|
||||
gpus: 1
|
||||
gpu-ids: 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user