From 96035f864a2f2648311455a63cd9dd102162c2da Mon Sep 17 00:00:00 2001 From: teddy Date: Thu, 12 Feb 2026 11:42:38 +0900 Subject: [PATCH] =?UTF-8?q?containerName=20=EC=83=9D=EC=84=B1=20=EB=B3=80?= =?UTF-8?q?=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../postgres/core/ModelTrainJobCoreService.java | 7 ++++++- .../postgres/entity/ModelTrainJobEntity.java | 10 +++++++++- .../cd/training/train/dto/ModelTrainJobDto.java | 2 ++ .../training/train/service/DockerTrainService.java | 2 +- .../cd/training/train/service/TrainJobService.java | 1 + .../cd/training/train/service/TrainJobWorker.java | 12 ++++++++++-- 6 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/kamco/cd/training/postgres/core/ModelTrainJobCoreService.java b/src/main/java/com/kamco/cd/training/postgres/core/ModelTrainJobCoreService.java index 350a248..4a2ce5d 100644 --- a/src/main/java/com/kamco/cd/training/postgres/core/ModelTrainJobCoreService.java +++ b/src/main/java/com/kamco/cd/training/postgres/core/ModelTrainJobCoreService.java @@ -47,7 +47,8 @@ public class ModelTrainJobCoreService { /** 실행 시작 처리 */ @Transactional - public void markRunning(Long jobId, String containerName, String logPath, String lockedBy) { + public void markRunning( + Long jobId, String containerName, String logPath, String lockedBy, Integer totalEpoch) { ModelTrainJobEntity job = modelTrainJobRepository .findById(jobId) @@ -59,6 +60,10 @@ public class ModelTrainJobCoreService { job.setStartedDttm(ZonedDateTime.now()); job.setLockedDttm(ZonedDateTime.now()); job.setLockedBy(lockedBy); + + if (totalEpoch != null) { + job.setTotalEpoch(totalEpoch); + } } /** 성공 처리 */ diff --git a/src/main/java/com/kamco/cd/training/postgres/entity/ModelTrainJobEntity.java b/src/main/java/com/kamco/cd/training/postgres/entity/ModelTrainJobEntity.java index 23c11e0..4be89a8 100644 --- a/src/main/java/com/kamco/cd/training/postgres/entity/ModelTrainJobEntity.java +++ b/src/main/java/com/kamco/cd/training/postgres/entity/ModelTrainJobEntity.java @@ -78,6 +78,12 @@ public class ModelTrainJobEntity { @Column(name = "locked_by", length = 100) private String lockedBy; + @Column(name = "total_epoch") + private Integer totalEpoch; + + @Column(name = "current_epoch") + private Integer currentEpoch; + public ModelTrainJobDto toDto() { return new ModelTrainJobDto( this.id, @@ -90,6 +96,8 @@ public class ModelTrainJobEntity { this.paramsJson, this.queuedDttm, this.startedDttm, - this.finishedDttm); + this.finishedDttm, + this.totalEpoch, + this.currentEpoch); } } diff --git a/src/main/java/com/kamco/cd/training/train/dto/ModelTrainJobDto.java b/src/main/java/com/kamco/cd/training/train/dto/ModelTrainJobDto.java index f9d0004..9545ec4 100644 --- a/src/main/java/com/kamco/cd/training/train/dto/ModelTrainJobDto.java +++ b/src/main/java/com/kamco/cd/training/train/dto/ModelTrainJobDto.java @@ -20,4 +20,6 @@ public class ModelTrainJobDto { private ZonedDateTime queuedDttm; private ZonedDateTime startedDttm; private ZonedDateTime finishedDttm; + private Integer totalEpoch; + private Integer currentEpoch; } diff --git a/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java b/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java index 4c3dea4..b27e5a2 100644 --- a/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java +++ b/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java @@ -87,7 +87,7 @@ public class DockerTrainService { log.info("[EPOCH] container={} {}/{}", containerName, currentEpoch, totalEpoch); // TODO 실행중인 에폭 저장 필요하면 만들어야함 - // TODO 완료여부를 여기다가? + // TODO 하지만 여기서 트랜젝션 걸리는 db 작업하면 안좋다고하는데..? // modelTrainMngCoreService.updateCurrentEpoch(modelId, // currentEpoch, totalEpoch); } diff --git a/src/main/java/com/kamco/cd/training/train/service/TrainJobService.java b/src/main/java/com/kamco/cd/training/train/service/TrainJobService.java index 189f0f9..ab53f79 100644 --- a/src/main/java/com/kamco/cd/training/train/service/TrainJobService.java +++ b/src/main/java/com/kamco/cd/training/train/service/TrainJobService.java @@ -58,6 +58,7 @@ public class TrainJobService { Map paramsMap = objectMapper.convertValue(trainRunRequest, Map.class); paramsMap.put("jobType", "TRAIN"); paramsMap.put("uuid", trainRunRequest.getUuid()); + paramsMap.put("totalEpoch", trainRunRequest.getEpochs()); Long jobId = modelTrainJobCoreService.createQueuedJob( diff --git a/src/main/java/com/kamco/cd/training/train/service/TrainJobWorker.java b/src/main/java/com/kamco/cd/training/train/service/TrainJobWorker.java index 00f8cd5..afa2268 100644 --- a/src/main/java/com/kamco/cd/training/train/service/TrainJobWorker.java +++ b/src/main/java/com/kamco/cd/training/train/service/TrainJobWorker.java @@ -47,9 +47,17 @@ public class TrainJobWorker { boolean isEval = "EVAL".equals(jobType); - String containerName = (isEval ? "eval-" : "train-") + jobId + "-" + params.get("uuid"); + String containerName = + (isEval ? "eval-" : "train-") + jobId + "-" + params.get("uuid").toString().substring(0, 8); - modelTrainJobCoreService.markRunning(jobId, containerName, null, "TRAIN_WORKER"); + Integer totalEpoch = null; + if (params.containsKey("totalEpoch")) { + if (params.get("totalEpoch") != null) { + totalEpoch = Integer.parseInt(params.get("totalEpoch").toString()); + } + } + + modelTrainJobCoreService.markRunning(jobId, containerName, null, "TRAIN_WORKER", totalEpoch); try { TrainRunResult result;