From 12f6bb715413705c53ca9a867091313b8d6aac86 Mon Sep 17 00:00:00 2001 From: teddy Date: Fri, 27 Feb 2026 23:31:04 +0900 Subject: [PATCH] =?UTF-8?q?=ED=95=98=EB=93=9C=EB=A7=81=ED=81=AC=20?= =?UTF-8?q?=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../train/service/TmpDatasetService.java | 242 +++++------------- .../train/service/TrainJobService.java | 6 +- 2 files changed, 68 insertions(+), 180 deletions(-) diff --git a/src/main/java/com/kamco/cd/training/train/service/TmpDatasetService.java b/src/main/java/com/kamco/cd/training/train/service/TmpDatasetService.java index dbbbd45..0b7547b 100644 --- a/src/main/java/com/kamco/cd/training/train/service/TmpDatasetService.java +++ b/src/main/java/com/kamco/cd/training/train/service/TmpDatasetService.java @@ -21,186 +21,68 @@ public class TmpDatasetService { private String trainBaseDir; /** - * train, val, test 폴더별로 link - * - * @param uid 임시폴더 uuid - * @param type train, val, test - * @param links tif pull path - * @throws IOException - */ - public void buildTmpDatasetHardlink(String uid, String type, List links) - throws IOException { - - if (links == null || links.isEmpty()) { - throw new IOException("links is empty"); - } - - Path tmp = Path.of(trainBaseDir, "tmp", uid); - - long hardlinksMade = 0; - - for (ModelTrainLinkDto dto : links) { - - if (type == null) { - log.warn("SKIP - trainType null: {}", dto); - continue; - } - - // type별 디렉토리 생성 - Files.createDirectories(tmp.resolve(type).resolve("input1")); - Files.createDirectories(tmp.resolve(type).resolve("input2")); - Files.createDirectories(tmp.resolve(type).resolve("label")); - Files.createDirectories(tmp.resolve(type).resolve("label-json")); - - // comparePath → input1 - hardlinksMade += link(tmp, type, "input1", dto.getComparePath()); - - // targetPath → input2 - hardlinksMade += link(tmp, type, "input2", dto.getTargetPath()); - - // labelPath → label - hardlinksMade += link(tmp, type, "label", dto.getLabelPath()); - - // geoJsonPath -> label-json - hardlinksMade += link(tmp, type, "label-json", dto.getGeoJsonPath()); - } - - if (hardlinksMade == 0) { - throw new IOException("No hardlinks created."); - } - - log.info("tmp dataset created: {}, hardlinksMade={}", tmp, hardlinksMade); - } - - private long link(Path tmp, String type, String part, String fullPath) throws IOException { - - if (fullPath == null || fullPath.isBlank()) return 0; - - Path src = Path.of(fullPath); - - if (!Files.isRegularFile(src)) { - log.warn("SKIP (not file): {}", src); - return 0; - } - - String fileName = src.getFileName().toString(); - Path dst = tmp.resolve(type).resolve(part).resolve(fileName); - - // 충돌 시 덮어쓰기 - if (Files.exists(dst)) { - log.warn("COLLISION overwrite: dst={} src={}", dst, src); - Files.delete(dst); - } - - Files.createLink(dst, src); - - return 1; - } - - private String safe(String s) { - return (s == null || s.isBlank()) ? null : s.trim(); - } - - /** - * request 전체 폴더 link + * 다른 데이터셋 파일과 이름이 겹치면 그 파일은 skip함 * * @param uid - * @param datasetUids + * @param type + * @param links * @return * @throws IOException */ - public String buildTmpDatasetSymlink(String uid, List datasetUids) throws IOException { + public String buildTmpDatasetSymlink(String uid, String type, List links) + throws IOException { - log.info("========== buildTmpDatasetHardlink START =========="); - log.info("uid={}", uid); - log.info("datasetUids={}", datasetUids); - log.info("requestDir(raw)={}", requestDir); + if (uid == null || uid.isBlank()) throw new IOException("uid is empty"); + if (type == null || type.isBlank()) throw new IOException("type is empty"); + if (links == null || links.isEmpty()) throw new IOException("links is empty"); + + log.info("========== buildTmpDatasetHardlink MERGE START =========="); + log.info("uid={}, type={}, links.size={}", uid, type, links.size()); Path BASE = toPath(requestDir); Path tmp = Path.of(trainBaseDir, "tmp", uid); - log.info("BASE={}", BASE); - log.info("BASE exists? {}", Files.isDirectory(BASE)); - log.info("tmp={}", tmp); + long hardlinksMade = 0; + long skippedCollision = 0; + long noDir = 0; - long noDir = 0, scannedDirs = 0, regularFiles = 0, hardlinksMade = 0; + // tmp// 준비 + for (String part : List.of("input1", "input2", "label", "label-json")) { + Files.createDirectories(tmp.resolve(type).resolve(part)); + } + + for (ModelTrainLinkDto dto : links) { + String datasetUid = safe(dto.getDatasetUid()); + if (datasetUid == null) { + log.warn("SKIP dto (datasetUid null): {}", dto); + continue; + } + + Path srcRoot = BASE.resolve(datasetUid); - // tmp 디렉토리 준비 - for (String type : List.of("train", "val", "test")) { for (String part : List.of("input1", "input2", "label", "label-json")) { - Path dir = tmp.resolve(type).resolve(part); - Files.createDirectories(dir); - log.info("createDirectories: {}", dir); - } - } - // 하드링크는 "같은 파일시스템"에서만 가능하므로 BASE/tmp가 같은 FS인지 미리 확인(권장) - try { - var baseStore = Files.getFileStore(BASE); - var tmpStore = Files.getFileStore(tmp.getParent()); // BASE/tmp - if (!baseStore.name().equals(tmpStore.name()) || !baseStore.type().equals(tmpStore.type())) { - throw new IOException( - "Hardlink requires same filesystem. baseStore=" - + baseStore.name() - + "(" - + baseStore.type() - + "), tmpStore=" - + tmpStore.name() - + "(" - + tmpStore.type() - + ")"); - } - } catch (Exception e) { - // FileStore 비교가 환경마다 애매할 수 있어서, 여기서는 경고만 주고 실제 createLink에서 최종 판단하게 둘 수도 있음. - log.warn("FileStore check skipped/failed (will rely on createLink): {}", e.toString()); - } + Path srcDir = srcRoot.resolve(type).resolve(part); + if (!Files.isDirectory(srcDir)) { + noDir++; + continue; + } - for (String id : datasetUids) { - Path srcRoot = BASE.resolve(id); - log.info("---- dataset id={} srcRoot={} exists? {}", id, srcRoot, Files.isDirectory(srcRoot)); + // ✅ 하위폴더까지 전부 + try (var walk = Files.walk(srcDir)) { + for (Path f : walk.filter(Files::isRegularFile).toList()) { - for (String type : List.of("train", "val", "test")) { - for (String part : List.of("input1", "input2", "label", "label-json")) { + String fileName = f.getFileName().toString(); + Path dst = tmp.resolve(type).resolve(part).resolve(fileName); - Path srcDir = srcRoot.resolve(type).resolve(part); - if (!Files.isDirectory(srcDir)) { - log.warn("SKIP (not directory): {}", srcDir); - noDir++; - continue; - } - - scannedDirs++; - log.info("SCAN dir={}", srcDir); - - try (var stream = Files.walk(srcDir)) { - for (Path f : stream.filter(Files::isRegularFile).toList()) { - if (!Files.isRegularFile(f)) { - log.debug("skip non-regular file: {}", f); - continue; - } - - regularFiles++; - - String dstName = f.getFileName().toString(); - Path dst = tmp.resolve(type).resolve(part).resolve(dstName); - - // dst가 남아있으면 삭제(심볼릭링크든 파일이든) 하고 다시만듬 - if (Files.exists(dst) || Files.isSymbolicLink(dst)) { - Files.delete(dst); - log.debug("deleted existing: {}", dst); - } - - try { - // 하드링크 생성 (dst가 새 파일로 생기지만 inode는 f와 동일) - Files.createLink(dst, f); - hardlinksMade++; - log.debug("created hardlink: {} => {}", dst, f); - } catch (IOException e) { - // 여기서 바로 실패시키면 “tmp는 만들었는데 내용은 0개” 같은 상태를 방지할 수 있음 - log.error("FAILED create hardlink: {} => {}", dst, f, e); - throw e; - } + // ✅ 이름 유지 + 충돌은 skip + if (Files.exists(dst)) { + skippedCollision++; + continue; } + + Files.createLink(dst, f); + hardlinksMade++; } } } @@ -208,29 +90,35 @@ public class TmpDatasetService { if (hardlinksMade == 0) { throw new IOException( - "No hardlinks created. regularFiles=" - + regularFiles - + ", scannedDirs=" - + scannedDirs - + ", noDir=" - + noDir); + "No hardlinks created. noDir=" + noDir + ", skippedCollision=" + skippedCollision); } - log.info("tmp dataset created: {}", tmp); log.info( - "summary: scannedDirs={}, noDir={}, regularFiles={}, hardlinksMade={}", - scannedDirs, - noDir, - regularFiles, - hardlinksMade); + "tmp dataset merged: {} (type={}), hardlinksMade={}, skippedCollision={}, noDir={}", + tmp, + type, + hardlinksMade, + skippedCollision, + noDir); return uid; } private static Path toPath(String p) { - if (p.startsWith("~/")) { - return Paths.get(System.getProperty("user.home")).resolve(p.substring(2)).normalize(); + if (p == null || p.isBlank()) { + throw new IllegalArgumentException("path is null or blank"); } - return Paths.get(p).toAbsolutePath().normalize(); + String trimmed = p.trim(); + if (trimmed.startsWith("~/")) { + return Paths.get(System.getProperty("user.home")) + .resolve(trimmed.substring(2)) + .toAbsolutePath() + .normalize(); + } + return Paths.get(trimmed).toAbsolutePath().normalize(); + } + + private static String safe(String s) { + return (s == null || s.isBlank()) ? null : s.trim(); } } diff --git a/src/main/java/com/kamco/cd/training/train/service/TrainJobService.java b/src/main/java/com/kamco/cd/training/train/service/TrainJobService.java index b5dd84e..c519fd2 100644 --- a/src/main/java/com/kamco/cd/training/train/service/TrainJobService.java +++ b/src/main/java/com/kamco/cd/training/train/service/TrainJobService.java @@ -274,11 +274,11 @@ public class TrainJobService { List testList = modelTrainMngCoreService.findDatasetTestPath(modelId); // train 데이터셋 심볼링크 생성 - tmpDatasetService.buildTmpDatasetHardlink(raw, "train", trainList); + tmpDatasetService.buildTmpDatasetSymlink(raw, "train", trainList); // val 데이터셋 심볼링크 생성 - tmpDatasetService.buildTmpDatasetHardlink(raw, "val", valList); + tmpDatasetService.buildTmpDatasetSymlink(raw, "val", valList); // test 데이터셋 심볼링크 생성 - tmpDatasetService.buildTmpDatasetHardlink(raw, "test", testList); + tmpDatasetService.buildTmpDatasetSymlink(raw, "test", testList); ModelTrainMngDto.UpdateReq updateReq = new ModelTrainMngDto.UpdateReq(); updateReq.setRequestPath(raw);