add make dataset

This commit is contained in:
2026-02-08 20:21:57 +09:00
parent 5bfde5798f
commit 045e3da923
101 changed files with 4039 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
package com.kamco.cd.geojsonscheduler;
import com.kamco.cd.geojsonscheduler.config.DockerProperties;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
@SpringBootApplication
@EnableConfigurationProperties(DockerProperties.class)
public class GeoJsonSchedulerApplication {
public static void main(String[] args) {
SpringApplication.run(GeoJsonSchedulerApplication.class, args);
}
}

View File

@@ -0,0 +1,32 @@
package com.kamco.cd.geojsonscheduler.batch;
import lombok.RequiredArgsConstructor;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.job.builder.JobBuilder;
import org.springframework.batch.core.repository.JobRepository;
import org.springframework.batch.core.step.builder.StepBuilder;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.transaction.PlatformTransactionManager;
@Configuration
@RequiredArgsConstructor
public class ExportGeoJsonJobConfig {
private final JobRepository jobRepository;
private final PlatformTransactionManager transactionManager;
private final ExportGeoJsonTasklet exportGeoJsonTasklet;
@Bean
public Job exportGeoJsonJob() {
return new JobBuilder("exportGeoJsonJob", jobRepository).start(exportGeoJsonStep()).build();
}
@Bean
public Step exportGeoJsonStep() {
return new StepBuilder("exportGeoJsonStep", jobRepository)
.tasklet(exportGeoJsonTasklet, transactionManager)
.build();
}
}

View File

@@ -0,0 +1,100 @@
package com.kamco.cd.geojsonscheduler.batch;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.AnalCntInfo;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.AnalMapSheetList;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.CompleteLabelData;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.CompleteLabelData.GeoJsonFeature;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.FeatureCollection;
import com.kamco.cd.geojsonscheduler.repository.TrainingDataReviewJobRepository;
import com.kamco.cd.geojsonscheduler.service.DockerRunnerService;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.springframework.batch.core.StepContribution;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
@Log4j2
@Component
@RequiredArgsConstructor
public class ExportGeoJsonTasklet implements Tasklet {
private final TrainingDataReviewJobRepository repository;
private final DockerRunnerService dockerRunnerService;
@Value("${training-data.geojson-dir}")
private String trainingDataDir;
@Override
public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) {
// 진행중인 회차 중, complete_cnt 가 존재하는 회차 목록 가져오기
List<AnalCntInfo> analList = repository.findAnalCntInfoList();
for (AnalCntInfo info : analList) {
if (Objects.equals(info.getAllCnt(), info.getFileCnt())) {
continue;
}
String resultUid = info.getResultUid();
// 어제까지 검수 완료된 총 데이터의 도엽별 목록 가져오기
List<AnalMapSheetList> analMapList = repository.findCompletedAnalMapSheetList(info.getAnalUid());
//TODO 도엽이 4개이상 존재할때 만 RUN 하기
if (analMapList.isEmpty()) {
continue;
}
boolean anyProcessed = false;
for (AnalMapSheetList mapSheet : analMapList) {
//도엽별 geom 데이터 가지고 와서 geojson 만들기
List<CompleteLabelData> completeList =
repository.findCompletedYesterdayLabelingList(
info.getAnalUid(), mapSheet.getMapSheetNum());
if (!completeList.isEmpty()) {
List<Long> geoUids = completeList.stream().map(CompleteLabelData::getGeoUid).toList();
List<GeoJsonFeature> features = completeList.stream().map(GeoJsonFeature::from).toList();
FeatureCollection collection = new FeatureCollection(features);
String filename = mapSheet.buildFilename(resultUid);
// 형식 /kamco-nfs/dataset/request/uuid/filename
Path outputPath = Paths.get(trainingDataDir + File.separator + "request" + File.separator + resultUid, filename);
try {
Files.createDirectories(outputPath.getParent());
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
objectMapper.writeValue(outputPath.toFile(), collection);
repository.updateLearnDataGeomFileCreateYn(geoUids);
anyProcessed = true;
} catch (IOException e) {
log.error(e.getMessage());
}
}
}
if (anyProcessed) {
dockerRunnerService.run(resultUid);
}
}
return RepeatStatus.FINISHED;
}
}

View File

@@ -0,0 +1,23 @@
package com.kamco.cd.geojsonscheduler.config;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
@Getter
@Setter
@ConfigurationProperties(prefix = "training-data.docker")
public class DockerProperties {
private String image;
private String user;
private String datasetVolume;
private String imagesVolume;
private String inputRoot;
private String outputRoot;
private int patchSize;
private int overlapPct;
private List<String> trainValTestRatio;
private double keepEmptyRatio;
}

View File

@@ -0,0 +1,121 @@
package com.kamco.cd.geojsonscheduler.dto;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.CompleteLabelData.GeoJsonFeature;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
public class TrainingDataReviewJobDto {
@Getter
@Setter
@RequiredArgsConstructor
@AllArgsConstructor
public static class AnalCntInfo {
Long analUid;
String resultUid;
Long allCnt;
Long completeCnt;
Long fileCnt;
}
@Getter
@Builder
@AllArgsConstructor
public static class AnalMapSheetList {
private Integer compareYyyy;
private Integer targetYyyy;
private String mapSheetNum;
public String buildFilename(String resultUid) {
return String.format(
"%s_%s_%s_%s_D15.geojson",
resultUid.substring(0, 8),
compareYyyy,
targetYyyy,
mapSheetNum);
}
}
@Getter
@Setter
@JsonPropertyOrder({"type", "features"})
public static class FeatureCollection {
private final String type = "FeatureCollection";
private List<GeoJsonFeature> features;
public FeatureCollection(List<GeoJsonFeature> features) {
this.features = features;
}
}
@Getter
@Setter
@JsonPropertyOrder({"type", "geometry", "properties"})
public static class CompleteLabelData {
private Long geoUid;
private String type;
@JsonIgnore private String geomStr;
private JsonNode geometry;
private Properties properties;
public CompleteLabelData(Long geoUid, String type, String geomStr, Properties properties) {
this.geoUid = geoUid;
this.type = type;
this.geomStr = geomStr;
ObjectMapper mapper = new ObjectMapper();
JsonNode jsonNode = null;
try {
if (geomStr != null) {
jsonNode = mapper.readTree(this.geomStr);
}
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
this.geometry = jsonNode;
if (jsonNode != null && jsonNode.isObject()) {
((ObjectNode) jsonNode).remove("crs");
}
this.properties = properties;
}
@Getter
@Setter
@RequiredArgsConstructor
@AllArgsConstructor
public static class Properties {
private String modelId;
private String before;
private String after;
}
@Getter
@AllArgsConstructor
public static class GeoJsonFeature {
private String type;
private JsonNode geometry;
private Properties properties;
public static GeoJsonFeature from(CompleteLabelData data) {
return new GeoJsonFeature(data.getType(), data.getGeometry(), data.getProperties());
}
}
}
}

View File

@@ -0,0 +1,18 @@
package com.kamco.cd.geojsonscheduler.enums;
import lombok.AllArgsConstructor;
import lombok.Getter;
@Getter
@AllArgsConstructor
public enum InspectState {
UNCONFIRM("미확인"),
EXCEPT("제외"),
COMPLETE("완료");
private final String desc;
public String getId() {
return name();
}
}

View File

@@ -0,0 +1,19 @@
package com.kamco.cd.geojsonscheduler.enums;
import lombok.AllArgsConstructor;
import lombok.Getter;
@Getter
@AllArgsConstructor
public enum LabelMngState {
PENDING("작업대기"),
ASSIGNED("작업할당"),
ING("진행중"),
FINISH("종료");
private final String desc;
public String getId() {
return name();
}
}

View File

@@ -0,0 +1,127 @@
package com.kamco.cd.geojsonscheduler.repository;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.AnalCntInfo;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.AnalMapSheetList;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.CompleteLabelData;
import com.kamco.cd.geojsonscheduler.dto.TrainingDataReviewJobDto.CompleteLabelData.Properties;
import java.sql.Timestamp;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.List;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;
@Repository
@RequiredArgsConstructor
public class TrainingDataReviewJobRepository {
private final JdbcTemplate jdbcTemplate;
public List<AnalCntInfo> findAnalCntInfoList() {
String sql =
"""
SELECT
la.anal_uid,
msl.uid AS result_uid,
SUM(CASE WHEN la.inspect_state IN ('UNCONFIRM', 'COMPLETE') OR la.inspect_state IS NULL THEN 1 ELSE 0 END) AS all_cnt,
SUM(CASE WHEN la.inspect_state = 'COMPLETE' THEN 1 ELSE 0 END) AS complete_cnt,
SUM(CASE WHEN mslg.file_create_yn = true THEN 1 ELSE 0 END) AS file_cnt
FROM tb_labeling_assignment la
INNER JOIN tb_map_sheet_anal_inference msai ON la.anal_uid = msai.anal_uid AND msai.anal_state = 'ING'
LEFT JOIN tb_map_sheet_learn msl ON msai.learn_id = msl.id
LEFT JOIN tb_map_sheet_learn_data_geom mslg ON la.inference_geom_uid = mslg.geo_uid
GROUP BY la.anal_uid, msl.uid
HAVING SUM(CASE WHEN la.inspect_state = 'COMPLETE' THEN 1 ELSE 0 END) > 0
""";
return jdbcTemplate.query(
sql,
(rs, rowNum) ->
new AnalCntInfo(
rs.getLong("anal_uid"),
rs.getString("result_uid"),
rs.getLong("all_cnt"),
rs.getLong("complete_cnt"),
rs.getLong("file_cnt")));
}
public List<AnalMapSheetList> findCompletedAnalMapSheetList(Long analUid) {
ZonedDateTime end =
LocalDate.now(ZoneId.of("Asia/Seoul")).atStartOfDay(ZoneId.of("Asia/Seoul"));
String sql =
"""
SELECT
msai.compare_yyyy,
msai.target_yyyy,
la.assign_group_id
FROM tb_labeling_assignment la
INNER JOIN tb_map_sheet_anal_inference msai ON la.anal_uid = msai.anal_uid
WHERE la.anal_uid = ?
AND la.inspect_state = 'COMPLETE'
AND la.inspect_stat_dttm < ?
GROUP BY msai.compare_yyyy, msai.target_yyyy, la.assign_group_id
""";
return jdbcTemplate.query(
sql,
(rs, rowNum) ->
AnalMapSheetList.builder()
.compareYyyy(rs.getInt("compare_yyyy"))
.targetYyyy(rs.getInt("target_yyyy"))
.mapSheetNum(rs.getString("assign_group_id"))
.build(),
analUid,
Timestamp.from(end.toInstant()));
}
public List<CompleteLabelData> findCompletedYesterdayLabelingList(
Long analUid, String mapSheetNum) {
ZonedDateTime end =
LocalDate.now(ZoneId.of("Asia/Seoul")).atStartOfDay(ZoneId.of("Asia/Seoul"));
String sql =
"""
SELECT
mslg.geo_uid,
'Feature' AS type,
ST_AsGeoJSON(mslg.geom) AS geom_str,
CASE
WHEN mslg.class_after_cd IN ('building', 'container') THEN 'M1'
WHEN mslg.class_after_cd = 'waste' THEN 'M2'
ELSE 'M3'
END AS model_id,
mslg.class_before_cd,
mslg.class_after_cd
FROM tb_labeling_assignment la
LEFT JOIN tb_map_sheet_learn_data_geom mslg ON la.inference_geom_uid = mslg.geo_uid
WHERE la.anal_uid = ?
AND la.assign_group_id = ?
AND la.inspect_state = 'COMPLETE'
AND la.inspect_stat_dttm < ?
""";
return jdbcTemplate.query(
sql,
(rs, rowNum) ->
new CompleteLabelData(
rs.getLong("geo_uid"),
rs.getString("type"),
rs.getString("geom_str"),
new Properties(
rs.getString("model_id"),
rs.getString("class_before_cd"),
rs.getString("class_after_cd"))),
analUid,
mapSheetNum,
Timestamp.from(end.toInstant()));
}
public void updateLearnDataGeomFileCreateYn(List<Long> geoUids) {
String placeholders = geoUids.stream().map(id -> "?").collect(Collectors.joining(","));
String sql =
"UPDATE tb_map_sheet_learn_data_geom SET file_create_yn = true, modified_date = NOW()"
+ " WHERE geo_uid IN ("
+ placeholders
+ ")";
jdbcTemplate.update(sql, geoUids.toArray());
}
}

View File

@@ -0,0 +1,84 @@
package com.kamco.cd.geojsonscheduler.service;
import com.kamco.cd.geojsonscheduler.config.DockerProperties;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.springframework.stereotype.Service;
@Log4j2
@Service
@RequiredArgsConstructor
public class DockerRunnerService {
private final DockerProperties dockerProperties;
public void run(String resultUid) {
List<String> command = buildCommand(resultUid);
log.info("Running docker command: {}", String.join(" ", command));
try {
ProcessBuilder pb = new ProcessBuilder(command);
pb.redirectErrorStream(true);
Process process = pb.start();
try (BufferedReader reader =
new BufferedReader(new InputStreamReader(process.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
log.info("[docker] {}", line);
}
}
int exitCode = process.waitFor();
if (exitCode != 0) {
log.error("Docker process exited with code {} for resultUid: {}", exitCode, resultUid);
} else {
log.info("Docker process completed successfully for resultUid: {}", resultUid);
}
} catch (IOException e) {
log.error("Failed to run docker command for resultUid {}: {}", resultUid, e.getMessage());
} catch (InterruptedException e) {
log.error("Docker process interrupted for resultUid {}: {}", resultUid, e.getMessage());
Thread.currentThread().interrupt();
}
}
private List<String> buildCommand(String resultUid) {
List<String> cmd = new ArrayList<>();
cmd.add("docker");
cmd.add("run");
cmd.add("--rm");
cmd.add("--user");
cmd.add(dockerProperties.getUser());
cmd.add("-v");
cmd.add(dockerProperties.getDatasetVolume());
cmd.add("-v");
cmd.add(dockerProperties.getImagesVolume());
cmd.add("--entrypoint");
cmd.add("python");
cmd.add(dockerProperties.getImage());
cmd.add("code/kamco_full_pipeline.py");
cmd.add("--labelling-folder");
cmd.add("request/" + resultUid);
cmd.add("--output-folder");
cmd.add("response/" + resultUid);
cmd.add("--input_root");
cmd.add(dockerProperties.getInputRoot());
cmd.add("--output_root");
cmd.add(dockerProperties.getOutputRoot());
cmd.add("--patch_size");
cmd.add(String.valueOf(dockerProperties.getPatchSize()));
cmd.add("--overlap_pct");
cmd.add(String.valueOf(dockerProperties.getOverlapPct()));
cmd.add("--train_val_test_ratio");
cmd.addAll(dockerProperties.getTrainValTestRatio());
cmd.add("--keep_empty_ratio");
cmd.add(String.valueOf(dockerProperties.getKeepEmptyRatio()));
return cmd;
}
}

View File

@@ -0,0 +1,11 @@
spring:
datasource:
url: jdbc:postgresql://192.168.2.127:15432/kamco_cds
username: kamco_cds
password: kamco_cds_Q!W@E#R$
hikari:
minimum-idle: 2
maximum-pool-size: 5
training-data:
geojson-dir: /kamco-nfs/dataset

View File

@@ -0,0 +1,8 @@
spring:
datasource:
url: jdbc:postgresql://localhost:5432/kamco_cds
username: kamco_cds
password: kamco_cds
training-data:
geojson-dir: /tmp/geojson

View File

@@ -0,0 +1,11 @@
spring:
datasource:
url: jdbc:postgresql://127.0.0.1:15432/kamco_cds
username: kamco_cds
password: kamco_cds_Q!W@E#R$
hikari:
minimum-idle: 2
maximum-pool-size: 5
training-data:
geojson-dir: /kamco-nfs/dataset

View File

@@ -0,0 +1,33 @@
spring:
application:
name: kamco-geojson-scheduler
profiles:
active: local
datasource:
driver-class-name: org.postgresql.Driver
hikari:
minimum-idle: 2
maximum-pool-size: 2
connection-timeout: 20000
idle-timeout: 300000
max-lifetime: 1800000
batch:
job:
enabled: true
initialize-schema: never
training-data:
docker:
image: kamco-cd-dataset:latest
user: "1000:1000"
dataset-volume: /kamco-nfs/dataset:/dataset
images-volume: /kamco-nfs/images:/kamco-nfs:ro
input-root: /dataset
output-root: /dataset
patch-size: 512
overlap-pct: 50
train-val-test-ratio:
- "0.7"
- "0.2"
- "0.1"
keep-empty-ratio: 0.1