From f00296cf2cc07704c31a5940e091144f763afc06 Mon Sep 17 00:00:00 2001 From: dean Date: Thu, 2 Apr 2026 21:17:01 +0900 Subject: [PATCH] welcome --- .../training/train/service/DockerTrainService.java | 13 +++++++++++-- src/main/resources/application-prod.yml | 4 ++++ src/main/resources/application.yml | 5 +++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java b/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java index 1f37115..177db1f 100644 --- a/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java +++ b/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java @@ -56,6 +56,13 @@ public class DockerTrainService { @Value("${spring.profiles.active}") private String profile; + @Value("${hyper.parameter.gpus}") + private String hyperGpus; + + + @Value("${hyper.parameter.gpu-ids}") + private String hyperGpuIds; + private final ModelTrainJobCoreService modelTrainJobCoreService; /** @@ -285,11 +292,13 @@ public class DockerTrainService { // addArg(c, "--gpu-ids", req.getGpuIds()); // null if ("prod".equals(profile)) { addArg(c, "--batch-size", 2); // 학습서버 GPU 1개인 곳은 batch-size:2 까지만 가능 - addArg(c, "--gpus", "1"); // 학습서버 GPU 1개인 곳은 1이어야 함 - addArg(c, "--gpu-ids", "0"); // 학습서버 GPU 1개인 곳은 0이어야 함 + } else { addArg(c, "--batch-size", req.getBatchSize()); // 학습서버 GPU 1개인 곳은 batch-size:2 까지만 가능 } + addArg(c, "--gpus", hyperGpus); // 학습서버 GPU 1개인 곳은 1이어야 함 + addArg(c, "--gpu-ids", hyperGpuIds); // 학습서버 GPU 1개인 곳은 0이어야 함 + addArg(c, "--lr", req.getLearningRate()); addArg(c, "--backbone", req.getBackbone()); addArg(c, "--epochs", req.getEpochs()); diff --git a/src/main/resources/application-prod.yml b/src/main/resources/application-prod.yml index efb3140..1d0e96a 100644 --- a/src/main/resources/application-prod.yml +++ b/src/main/resources/application-prod.yml @@ -41,3 +41,7 @@ train: container_prefix: kamco-cd-train shm_size: 16g ipc_host: true +hyper: + parameter: + gpus: 4 + gpu-ids: 0,1,2,3 diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 7271ba1..d13e1df 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -78,3 +78,8 @@ management: exposure: include: - "health" +hyper: + parameter: + gpus: 1 + gpu-ids: 0 +