diff --git a/src/main/java/com/kamco/cd/training/train/service/TrainJobWorker.java b/src/main/java/com/kamco/cd/training/train/service/TrainJobWorker.java index f4256d7..985dabe 100644 --- a/src/main/java/com/kamco/cd/training/train/service/TrainJobWorker.java +++ b/src/main/java/com/kamco/cd/training/train/service/TrainJobWorker.java @@ -108,6 +108,10 @@ public class TrainJobWorker { return; } + /** + * 0 정상 종료 SUCCESS 1~125 학습 코드 에러 FAILED 137 OOMKill FAILED 143 SIGTERM (stop) STOP -1 우리 내부 + * 강제 중단 STOP + */ if (result.getExitCode() == 0) { // 성공 처리 modelTrainJobCoreService.markSuccess(jobId, result.getExitCode()); @@ -124,18 +128,34 @@ public class TrainJobWorker { } } else { - String failMsg = result.getStatus() + "\n" + result.getLogs(); - log.info("training fail Msg ={}", failMsg); - // 실패 처리 - modelTrainJobCoreService.markPaused( - jobId, result.getExitCode(), result.getStatus() + "\n" + result.getLogs()); - if (isEval) { - // 오류 정보 등록 - modelTrainMngCoreService.markStep2Stop(modelId, "exit=" + result.getExitCode()); + String failMsg = result.getStatus() + "\n" + result.getLogs(); + log.info("training fail exitCode={} Msg ={}", result.getExitCode(), failMsg); + + if (result.getExitCode() == -1 || result.getExitCode() == 143) { + // 실패 처리 + modelTrainJobCoreService.markPaused( + jobId, result.getExitCode(), result.getStatus() + "\n" + result.getLogs()); + + if (isEval) { + // 오류 정보 등록 + modelTrainMngCoreService.markStep2Stop(modelId, "exit=" + result.getExitCode()); + } else { + // 오류 정보 등록 + modelTrainMngCoreService.markStep1Stop(modelId, "exit=" + result.getExitCode()); + } } else { - // 오류 정보 등록 - modelTrainMngCoreService.markStep1Stop(modelId, "exit=" + result.getExitCode()); + // 실패 처리 + modelTrainJobCoreService.markFailed( + jobId, result.getExitCode(), result.getStatus() + "\n" + result.getLogs()); + + if (isEval) { + // 오류 정보 등록 + modelTrainMngCoreService.markStep2Error(modelId, "exit=" + result.getExitCode()); + } else { + // 오류 정보 등록 + modelTrainMngCoreService.markError(modelId, "exit=" + result.getExitCode()); + } } }