diff --git a/kamco-make-dataset-generation/build/libs/generator-dataset-for-training.jar b/kamco-make-dataset-generation/build/libs/generator-dataset-for-training.jar index cd0764b..1222b88 100644 Binary files a/kamco-make-dataset-generation/build/libs/generator-dataset-for-training.jar and b/kamco-make-dataset-generation/build/libs/generator-dataset-for-training.jar differ diff --git a/kamco-make-dataset-generation/src/main/java/com/kamco/cd/geojsonscheduler/GeoJsonSchedulerApplication.java b/kamco-make-dataset-generation/src/main/java/com/kamco/cd/geojsonscheduler/GeoJsonSchedulerApplication.java index 3165867..1e90fae 100644 --- a/kamco-make-dataset-generation/src/main/java/com/kamco/cd/geojsonscheduler/GeoJsonSchedulerApplication.java +++ b/kamco-make-dataset-generation/src/main/java/com/kamco/cd/geojsonscheduler/GeoJsonSchedulerApplication.java @@ -1,12 +1,13 @@ package com.kamco.cd.geojsonscheduler; import com.kamco.cd.geojsonscheduler.config.DockerProperties; +import com.kamco.cd.geojsonscheduler.config.TrainDockerProperties; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.context.properties.EnableConfigurationProperties; @SpringBootApplication -@EnableConfigurationProperties(DockerProperties.class) +@EnableConfigurationProperties({DockerProperties.class, TrainDockerProperties.class}) public class GeoJsonSchedulerApplication { public static void main(String[] args) { diff --git a/kamco-make-dataset-generation/src/main/java/com/kamco/cd/geojsonscheduler/batch/ExportGeoJsonTasklet.java b/kamco-make-dataset-generation/src/main/java/com/kamco/cd/geojsonscheduler/batch/ExportGeoJsonTasklet.java deleted file mode 100644 index 2931dfe..0000000 --- a/kamco-make-dataset-generation/src/main/java/com/kamco/cd/geojsonscheduler/batch/ExportGeoJsonTasklet.java +++ /dev/null @@ -1,152 +0,0 @@ -package com.kamco.cd.geojsonscheduler.batch; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.SerializationFeature; -import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.AnalCntInfo; -import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.AnalMapSheetList; -import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.CompleteLabelData; -import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.CompleteLabelData.GeoJsonFeature; -import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.FeatureCollection; -import com.kamco.cd.geojsonscheduler.repository.TrainingDataReviewJobRepository; -import com.kamco.cd.geojsonscheduler.service.DockerRunnerService; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.List; -import java.util.Objects; -import lombok.RequiredArgsConstructor; -import lombok.extern.log4j.Log4j2; -import org.springframework.batch.core.StepContribution; -import org.springframework.batch.core.scope.context.ChunkContext; -import org.springframework.batch.core.step.tasklet.Tasklet; -import org.springframework.batch.repeat.RepeatStatus; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Component; - -@Log4j2 -@Component -@RequiredArgsConstructor -public class ExportGeoJsonTasklet implements Tasklet { - - private final TrainingDataReviewJobRepository repository; - private final DockerRunnerService dockerRunnerService; - - @Value("${training-data.geojson-dir}") - private String trainingDataDir; - - @Override - public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) { - log.info("========================================"); - log.info("배치 작업 시작"); - log.info("========================================"); - - // 1. StepContext를 통해 바로 가져오기 (가장 추천) - String jobName = chunkContext.getStepContext().getJobName(); - log.info("Job Name: {}", jobName); - - // 진행중인 회차 중, complete_cnt 가 존재하는 회차 목록 가져오기 - log.info("진행중인 회차 목록 조회 중..."); - List analList = repository.findAnalCntInfoList(); - log.info("진행중인 회차 수: {}", analList.size()); - - int processedAnalCount = 0; - for (AnalCntInfo info : analList) { - log.info("----------------------------------------"); - log.info("회차 처리 중: AnalUid={}, ResultUid={}", info.getAnalUid(), info.getResultUid()); - log.info("전체 건수: {}, 파일 건수: {}", info.getAllCnt(), info.getFileCnt()); - - if (Objects.equals(info.getAllCnt(), info.getFileCnt())) { - log.info("모든 파일이 이미 처리됨. 건너뜀."); - continue; - } - - //추론 ID - String resultUid = info.getResultUid(); - log.info("ResultUid: {}", resultUid); - - //insert 하기 jobname, resultUid , 시작시간 - // 어제까지 검수 완료된 총 데이터의 도엽별 목록 가져오기 - log.info("검수 완료된 도엽 목록 조회 중... (AnalUid={})", info.getAnalUid()); - List analMapList = repository.findCompletedAnalMapSheetList(info.getAnalUid()); - log.info("검수 완료된 도엽 수: {}", analMapList.size()); - - //TODO 도엽이 4개이상 존재할때 만 RUN 하기 - if (analMapList.isEmpty()) { - log.warn("검수 완료된 도엽이 없음. 건너뜀."); - continue; - } - - //insert 하기 jobname, resultUid , 시작시간 - boolean anyProcessed = false; - int processedMapSheetCount = 0; - int totalGeoJsonFiles = 0; - - for (AnalMapSheetList mapSheet : analMapList) { - log.info(" 도엽 처리 중: MapSheetNum={}", mapSheet.getMapSheetNum()); - - //도엽별 geom 데이터 가지고 와서 geojson 만들기 - List completeList = - repository.findCompletedYesterdayLabelingList( - info.getAnalUid(), mapSheet.getMapSheetNum()); - log.info(" 완료된 라벨링 데이터 수: {}", completeList.size()); - - if (!completeList.isEmpty()) { - List geoUids = completeList.stream().map(CompleteLabelData::getGeoUid).toList(); - log.info(" GeoUID 목록 생성 완료: {} 건", geoUids.size()); - - List features = completeList.stream().map(GeoJsonFeature::from).toList(); - log.info(" GeoJSON Feature 변환 완료: {} 개", features.size()); - - FeatureCollection collection = new FeatureCollection(features); - String filename = mapSheet.buildFilename(resultUid); - log.info(" GeoJSON 파일명: {}", filename); - - // 형식 /kamco-nfs/dataset/request/uuid/filename - Path outputPath = Paths.get(trainingDataDir + File.separator + "request" + File.separator + resultUid, filename); - log.info(" 출력 경로: {}", outputPath); - - try { - Files.createDirectories(outputPath.getParent()); - log.info(" 디렉토리 생성 완료: {}", outputPath.getParent()); - - ObjectMapper objectMapper = new ObjectMapper(); - objectMapper.enable(SerializationFeature.INDENT_OUTPUT); - objectMapper.writeValue(outputPath.toFile(), collection); - log.info(" GeoJSON 파일 저장 완료: {}", outputPath); - - repository.updateLearnDataGeomFileCreateYn(geoUids); - log.info(" DB 업데이트 완료: {} 건", geoUids.size()); - - anyProcessed = true; - processedMapSheetCount++; - totalGeoJsonFiles++; - } catch (IOException e) { - log.error(" GeoJSON 파일 생성 실패: {}", e.getMessage(), e); - } - } - } - - log.info("회차 처리 완료: ResultUid={}", resultUid); - log.info(" 처리된 도엽 수: {}", processedMapSheetCount); - log.info(" 생성된 GeoJSON 파일 수: {}", totalGeoJsonFiles); - - if (anyProcessed) { - log.info("Docker 컨테이너 실행 중... (ResultUid={})", resultUid); - dockerRunnerService.run(resultUid); - log.info("Docker 컨테이너 실행 완료 (ResultUid={})", resultUid); - processedAnalCount++; - } else { - log.warn("처리된 도엽이 없어 Docker 실행 건너뜀 (ResultUid={})", resultUid); - } - } - - log.info("========================================"); - log.info("배치 작업 완료"); - log.info("처리된 회차 수: {}", processedAnalCount); - log.info("========================================"); - - return RepeatStatus.FINISHED; - } -} diff --git a/kamco-make-dataset-generation/src/main/resources/application-prod.yml b/kamco-make-dataset-generation/src/main/resources/application-prod.yml index 707a18c..9eae809 100644 --- a/kamco-make-dataset-generation/src/main/resources/application-prod.yml +++ b/kamco-make-dataset-generation/src/main/resources/application-prod.yml @@ -9,3 +9,20 @@ spring: training-data: geojson-dir: /kamco-nfs/dataset + +# Train Model Docker Configuration +train-data: + docker: + image: kamco-cd-train:latest + data-volume: /kamco-nfs/dataset:/data + checkpoints-volume: /kamco-nfs/checkpoints:/checkpoints + dataset-folder: /data/dataset + output-folder: /data/output + input-size: "512" + crop-size: "256" + batch-size: 8 + gpu-ids: "0,1,2,3" + gpus: 4 + lr: "0.001" + backbone: resnet50 + epochs: 100