diff --git a/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java b/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java index e99daf5..b0f0e2c 100644 --- a/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java +++ b/src/main/java/com/kamco/cd/training/train/service/DockerTrainService.java @@ -62,10 +62,36 @@ public class DockerTrainService { ProcessBuilder pb = new ProcessBuilder(cmd); pb.redirectErrorStream(true); - Process p = pb.start(); - // 로그는 별도 스레드에서 읽기 (메인 스레드가 readLine에 안 걸리게) StringBuilder logBuilder = new StringBuilder(); + Process p = pb.start(); + + log.info("[TRAIN-BOOT] docker run started. container={}", containerName); + + try { + log.info("[TRAIN-BOOT] pid={}", p.pid()); // Java 9+ + } catch (Throwable ignore) { + } + + try { + // 바로 죽었는지 100ms만 체크 + if (p.waitFor(100, TimeUnit.MILLISECONDS)) { + int exit = p.exitValue(); + String earlyLogs; + synchronized (logBuilder) { + earlyLogs = logBuilder.toString(); + } + log.error( + "[TRAIN-BOOT] docker run exited immediately. container={} exit={}", + containerName, + exit); + log.error("[TRAIN-BOOT] early logs:\n{}", earlyLogs); + } else { + log.info("[TRAIN-BOOT] docker run is still running. container={}", containerName); + } + } catch (Exception e) { + log.warn("[TRAIN-BOOT] early-exit check failed: {}", e.toString(), e); + } Pattern epochPattern = Pattern.compile("Epoch\\(train\\)\\s+\\[(\\d+)\\]\\[(\\d+)/(\\d+)\\]"); @@ -176,7 +202,7 @@ public class DockerTrainService { List c = new ArrayList<>(); - c.add("docker"); + c.add("/usr/bin/docker"); c.add("run"); // 컨테이너 이름 지정 @@ -386,7 +412,7 @@ public class DockerTrainService { List c = new ArrayList<>(); - c.add("docker"); + c.add("/usr/bin/docker"); c.add("run"); c.add("--name"); c.add(containerName);