추론 수정

2026-02-27 09:15:38 +09:00
parent 4629715443
commit fbad8d1cd3
2 changed files with 175 additions and 70 deletions
--- a/src/main/java/com/kamco/cd/kamcoback/common/inference/utils/GeoJsonValidator.java
+++ b/src/main/java/com/kamco/cd/kamcoback/common/inference/utils/GeoJsonValidator.java
@@ -12,154 +12,244 @@ import org.apache.logging.log4j.Logger;
 import org.springframework.http.HttpStatus;
 import org.springframework.web.server.ResponseStatusException;

+/**
+ * GeoJSON 파일의 "features[].properties.scene_id" 값들이 "요청한 도엽번호 목록(requestedMapSheetNums)"과 정확히 일치하는지
+ * 검증하는 유틸.
+ *
+ * <p>핵심 목적: - 요청한 도엽번호를 기반으로 GeoJSON을 생성했는데, 실제 결과 파일에 누락/추가/중복/빈값(scene_id 없음) 등이 발생했는지 빠르게 잡아내기.
+ *
+ * <p>검증 실패 시: - 404: 파일 자체가 없음 - 400: 파일이 비어있거나(0 byte), features 구조가 이상하거나, 요청 목록이 비어있음 - 500: 파일
+ * IO/파싱 자체가 실패(읽기 실패 등) - 422: 정합성(요청 vs 결과)이 맞지 않음 (누락/추가/중복/빈 scene_id 존재)
+ */
 public class GeoJsonValidator {

+  /** GeoJSON 파싱용 ObjectMapper (정적 1개로 재사용) */
  private static final ObjectMapper om = new ObjectMapper();
+
+  /** 로그 출력용 */
  private static final Logger log = LogManager.getLogger(GeoJsonValidator.class);

+  /**
+   * @param geojsonPath GeoJSON 파일 경로(문자열)
+   * @param requestedMapSheetNums "요청한 도엽번호" 리스트 (중복/공백/NULL 포함 가능)
+   *     <p>동작 개요: 1) 파일 존재/크기 검증 2) 요청 도엽번호 목록 정리(Trim + 공백 제거 + 중복 제거) 3) GeoJSON 파싱 후 features 배열
+   *     확보 4) features에서 scene_id 추출하여 유니크 set 구성 5) requested vs found 비교: - missing: requested -
+   *     found - extra : found - requested - duplicates: GeoJSON 내부에서 scene_id 중복 등장 - nullIdCount:
+   *     scene_id가 null/blank 인 feature 개수 6) 이상 있으면 422로 실패 처리
+   */
  public static void validateWithRequested(String geojsonPath, List<String> requestedMapSheetNums) {

+    // 문자열 경로를 Path로 변환 (Files API 사용 목적)
    Path path = Path.of(geojsonPath);

+    // =========================================================
    // 1) 파일 기본 검증
+    //    - 파일이 존재하는지
+    //    - 파일 크기가 0인지(비어있으면 생성 실패/오류 가능성)
+    // =========================================================
    try {
+      // 파일 존재 여부 체크 (없으면 404)
      if (!Files.exists(path)) {
        throw new ResponseStatusException(
            HttpStatus.NOT_FOUND, "GeoJSON 파일이 존재하지 않습니다: " + geojsonPath);
      }
+
+      // 파일 사이즈 체크 (0 byte면 400)
      if (Files.size(path) == 0) {
        throw new ResponseStatusException(
            HttpStatus.BAD_REQUEST, "GeoJSON 파일이 비어있습니다: " + geojsonPath);
      }
    } catch (IOException e) {
+      // 파일 사이즈/상태 확인 중 IO 오류면 서버오류로 처리
      log.error("GeoJSON 파일 상태 확인 실패: path={}", path, e);
      throw new ResponseStatusException(
          HttpStatus.INTERNAL_SERVER_ERROR, "GeoJSON 파일 상태 확인 실패: " + geojsonPath, e);
    }

+    // =========================================================
+    // 2) 요청 도엽 리스트 유효성 검증
+    //    - 요청 목록 자체가 null/empty면 검증할 기준이 없으므로 400
+    // =========================================================
    if (requestedMapSheetNums == null || requestedMapSheetNums.isEmpty()) {
      throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "requestedMapSheetNums 가 비어있습니다.");
    }

-    // 2) 요청 도엽 Set (중복/공백 제거)
+    // =========================================================
+    // 2-1) 요청 도엽 Set 정리 (중복/공백/NULL 제거)
+    //    - null 제거
+    //    - trim 적용
+    //    - 빈 문자열 제거
+    //    - LinkedHashSet 사용: "중복 제거 + 원래 입력 순서 유지"
+    // =========================================================
    Set<String> requested =
        requestedMapSheetNums.stream()
-            .filter(Objects::nonNull)
-            .map(String::trim)
-            .filter(s -> !s.isEmpty())
-            .collect(Collectors.toCollection(LinkedHashSet::new));
+            .filter(Objects::nonNull) // null 제거
+            .map(String::trim) // 앞뒤 공백 제거
+            .filter(s -> !s.isEmpty()) // "" 제거
+            .collect(Collectors.toCollection(LinkedHashSet::new)); // 중복 제거 + 순서 유지

+    // 정리 결과가 비어있으면(전부 null/공백)이므로 400
    if (requested.isEmpty()) {
      throw new ResponseStatusException(
          HttpStatus.BAD_REQUEST, "requestedMapSheetNums 가 공백/NULL만 포함합니다.");
    }

+    // =========================================================
    // 3) GeoJSON 파싱
+    //    기대 구조:
+    //    {
+    //      "type": "FeatureCollection",
+    //      "features": [ ... ]
+    //    }
+    //
+    //    - features가 없거나 배열이 아니면 "유효하지 않은 GeoJSON" (400)
+    //    - 파일 읽기/파싱 IO 문제는 500
+    //    - JSON 자체가 깨진 경우는 400
+    // =========================================================
    final JsonNode features;
    try {
+      // JSON 파일을 트리 형태로 파싱
      JsonNode root = om.readTree(path.toFile());
+
+      // GeoJSON FeatureCollection의 핵심은 features 배열
      features = root.get("features");

+      // features가 없거나 배열이 아니면 GeoJSON 구조가 이상한 것
      if (features == null || !features.isArray()) {
        throw new ResponseStatusException(
            HttpStatus.BAD_REQUEST, "유효하지 않은 GeoJSON: features가 없거나 배열이 아닙니다.");
      }
    } catch (ResponseStatusException e) {
+      // 위에서 직접 던진 에러는 그대로 전달
      throw e;
    } catch (IOException e) {
+      // 읽기/파싱 과정에서 IO 문제가 터지면 서버오류
      log.error("GeoJSON 파일 읽기/파싱 실패: path={}", path, e);
      throw new ResponseStatusException(
          HttpStatus.INTERNAL_SERVER_ERROR, "GeoJSON 파일 읽기/파싱 실패: " + geojsonPath, e);
    } catch (Exception e) {
+      // JSON 문법 오류/예상치 못한 파싱 오류는 클라이언트 입력/파일 자체 문제로 400 처리
      log.error("GeoJSON 파싱 오류(비정상 JSON): path={}", path, e);
      throw new ResponseStatusException(
          HttpStatus.BAD_REQUEST, "GeoJSON 파싱 오류(비정상 JSON): " + geojsonPath, e);
    }

+    // =========================================================
    // 4) 검증 로직
+    //    - featureCount: 전체 feature 수 (중복 포함)
+    //    - foundUnique: GeoJSON에 등장한 유니크 scene_id 집합
+    //    - duplicates: GeoJSON 내부에서 scene_id가 중복된 목록(샘플 출력용)
+    //    - nullIdCount: scene_id가 없거나 빈 값인 feature 개수
+    // =========================================================
    int featureCount = features.size();

+    // 유니크 scene_id를 담는 Set (중복 판단을 위해 add 결과를 사용)
    Set<String> foundUnique = new HashSet<>();
+
+    // 중복된 scene_id 목록 (샘플 로그 출력용이라 순서 유지 가능한 LinkedHashSet 사용)
    Set<String> duplicates = new LinkedHashSet<>();
+
+    // scene_id가 null 또는 blank인 feature의 개수 (데이터 이상)
    int nullIdCount = 0;

+    // ---------------------------------------------------------
+    // features를 돌면서 feature.properties.scene_id를 추출한다.
+    //
+    // 기대 구조(일반적):
+    // features[i] = {
+    //   "type": "Feature",
+    //   "properties": {
+    //      "scene_id": "도엽번호"
+    //   },
+    //   "geometry": {...}
+    // }
+    // ---------------------------------------------------------
    for (JsonNode feature : features) {
      JsonNode props = feature.get("properties");
+
+      // properties가 있고 scene_id가 null이 아니면 텍스트로 읽음
+      // 없으면 null 처리
      String sceneId =
          (props != null && props.hasNonNull("scene_id")) ? props.get("scene_id").asText() : null;

+      // scene_id가 없거나 빈값이면 "정상적으로 도엽번호가 들어오지 않은 feature"로 카운트
      if (sceneId == null || sceneId.isBlank()) {
-        nullIdCount++;
+        nullIdCount++; // 도엽번호가 없으면 증가
        continue;
      }
+
+      // foundUnique.add(sceneId)가 false면 "이미 같은 값이 있었다"는 뜻 => 중복
      if (!foundUnique.add(sceneId)) {
        duplicates.add(sceneId);
      }
    }

-    // foundUnique에 있는 것들을 missing에서 제거
+    // =========================================================
+    // 4-1) requested vs found 비교(set 차집합)
+    //
+    // missing = requested - found
+    //   : 요청은 했는데 결과 GeoJSON에 없는 도엽번호
+    //
+    // extra = found - requested
+    //   : 요청하지 않았는데 결과 GeoJSON에 들어간 도엽번호
+    // =========================================================
+
+    // missing: requested를 복사한 뒤(foundUnique에 있는 값들을 제거) => 남은 것이 누락분
    Set<String> missing = new LinkedHashSet<>(requested);
    missing.removeAll(foundUnique);

-    // requested에 있는 것들을 extra에서 제거
+    // extra: foundUnique를 복사한 뒤(requested에 있는 값들을 제거) => 남은 것이 추가분
    Set<String> extra = new LinkedHashSet<>(foundUnique);
    extra.removeAll(requested);

-    // ================================================
-    // GeoJSON Validation
-    //
-    // 요청한 도엽번호(requested)와
-    // 실제 생성된 GeoJSON 파일의 scene_id를 비교하여
-    // 정합성(데이터 일치 여부)을 검증한다.
-    //
-    // 검증 항목:
-    // 1. features(total)            : GeoJSON 전체 feature 개수 (중복 포함)
-    // 2. requested(unique)          : 요청한 도엽번호 개수
-    // 3. found(unique scene_id)     : GeoJSON에서 실제 발견된 유니크 도엽 개수
-    // 4. scene_id null/blank        : scene_id가 없는 feature 개수 (데이터 이상)
-    // 5. duplicates(scene_id)       : 동일 도엽이 중복 생성된 개수
-    // 6. missing(requested - found) : 요청했지만 파일에 없는 도엽 개수
-    // 7. extra(found - requested)   : 요청하지 않았는데 파일에 포함된 도엽 개수
-    //
-    // 정상 기준:
-    // - missing = 0
-    // - extra = 0
-    // - duplicates = 0
-    // - nullId = 0
-    // - requested(unique) == found(unique scene_id)
-    //
-    // 위 조건을 만족하지 않으면 GeoJSON 생성 오류로 판단한다.
-    // ================================================
-
-    // 5) 로그
+    // =========================================================
+    // 5) 로그 출력
+    //    - 운영에서 문제 생겼을 때 "요청 vs 생성 결과"를 한 눈에 보게
+    //    - sample 로그는 너무 길어질 수 있으므로 limit 걸어줌
+    // =========================================================
    log.info(
        """
-      ===== GeoJSON Validation =====
-      file: {}
-      features(total): {}
-      requested(unique): {}
-      found(unique scene_id): {}
-      scene_id null/blank: {}
-      duplicates(scene_id): {}
-      missing(requested - found): {}
-      extra(found - requested): {}
-      ==============================
-      """,
+    ===== GeoJSON Validation =====
+    file: {}
+    features(total): {}
+    requested(unique): {}
+    found(unique scene_id): {}
+    scene_id null/blank: {}
+    duplicates(scene_id): {}
+    missing(requested - found): {}
+    extra(found - requested): {}
+    ==============================
+    """,
        geojsonPath,
-        featureCount,
-        requested.size(),
-        foundUnique.size(),
-        nullIdCount,
-        duplicates.size(),
-        missing.size(),
-        extra.size());
+        featureCount, // 중복 포함한 전체 feature 수
+        requested.size(), // 요청 도엽 유니크 수
+        foundUnique.size(), // GeoJSON에서 발견된 scene_id 유니크 수
+        nullIdCount, // scene_id가 비어있는 feature 수
+        duplicates.size(), // 중복 scene_id 종류 수
+        missing.size(), // 요청했지만 빠진 도엽 수
+        extra.size()); // 요청하지 않았는데 들어온 도엽 수

+    // 중복/누락/추가 항목은 전체를 다 찍으면 로그 폭발하므로 샘플만
    if (!duplicates.isEmpty())
      log.warn("duplicates sample: {}", duplicates.stream().limit(20).toList());
+
    if (!missing.isEmpty()) log.warn("missing sample: {}", missing.stream().limit(50).toList());
+
    if (!extra.isEmpty()) log.warn("extra sample: {}", extra.stream().limit(50).toList());

-    // 6) 실패면 422
+    // =========================================================
+    // 6) 실패 조건 판정
+    //
+    // 아래 중 하나라도 있으면 "요청 대비 결과 정합성이 깨졌다"로 보고 실패 처리(422):
+    // - missing 존재: 요청했는데 결과에 없음
+    // - extra 존재  : 요청 안했는데 결과에 있음
+    // - duplicates 존재: 동일 도엽이 중복 생성됨
+    // - nullIdCount > 0: scene_id가 비어있는 feature가 있음(데이터 이상)
+    //
+    // 422(Unprocessable Entity):
+    // - 요청 문법은 맞지만(파일은 있고 JSON도 읽힘),
+    //   내용(정합성)이 요구사항을 만족하지 못하는 경우에 적합.
+    // =========================================================
    if (!missing.isEmpty() || !extra.isEmpty() || !duplicates.isEmpty() || nullIdCount > 0) {
      throw new ResponseStatusException(
          HttpStatus.UNPROCESSABLE_ENTITY,
@@ -168,6 +258,7 @@ public class GeoJsonValidator {
              missing.size(), extra.size(), duplicates.size(), nullIdCount));
    }

+    // 모든 조건을 통과하면 정상
    log.info("GeoJSON validation OK");
  }
 }
--- a/src/main/java/com/kamco/cd/kamcoback/inference/service/InferenceResultService.java
+++ b/src/main/java/com/kamco/cd/kamcoback/inference/service/InferenceResultService.java
@@ -286,58 +286,72 @@ public class InferenceResultService {
      throw new CustomApiException("NOT_FOUND_COMPARE_YEAR", HttpStatus.NOT_FOUND);
    }

+    log.info("targetMngList size = {}", targetMngList.size());
+    log.info("compareMngList size = {}", compareMngList.size());
    log.info("Difference in count = {}", targetMngList.size() - compareMngList.size());

-    // 로그용 원본 카운트 (fallback 추가 전)
+    // 로그용 원본 카운트 (이전도엽 추가 전)
    int targetTotal = targetMngList.size();
    int compareTotalBeforeFallback = compareMngList.size();

-    // target - compare 구해서 이전년도로 compare 보완
-    List<String> compareNums0 =
-        compareMngList.stream().map(MngListDto::getMapSheetNum).filter(Objects::nonNull).toList();
+    // 기준연도 기준 비교연도 구해서 이전년도로 compare 보완 하기위해서 도엽번호만 정리
+    Set<String> compareSet0 =
+        compareMngList.stream()
+            .map(MngListDto::getMapSheetNum)
+            .filter(Objects::nonNull)
+            .collect(Collectors.toSet());

-    // 기준연도에 없는 도엽을 비교년도 이전 도엽에서 찾아서 추가하기
+    // 기준연도 기준 비교연도에 도협번호가 없으면 이전연도 조회해서 compare 보완, 없는거 담기
    List<String> targetOnlyMapSheetNums =
        targetMngList.stream()
            .map(MngListDto::getMapSheetNum)
            .filter(Objects::nonNull)
-            .filter(num -> !compareNums0.contains(num))
+            .filter(num -> !compareSet0.contains(num))
            .toList();

-    // 이전년도(fallback) 추가
+    log.info("targetOnlyMapSheetNums in count = {}", targetOnlyMapSheetNums.size());
+
+    // 이전연도 초회 추가
    compareMngList.addAll(
        mapSheetMngCoreService.findFallbackCompareYearByMapSheets(
            req.getCompareYyyy(), targetOnlyMapSheetNums));

+    log.info("fallback compare size= {}", compareMngList.size());
+
    // 이전연도 추가 후 compare 총 개수
    int compareTotalAfterFallback = compareMngList.size();

-    // 교집합 도엽번호(mapSheetNums) 교집합 생성
-    List<String> compareNums1 =
-        compareMngList.stream().map(MngListDto::getMapSheetNum).filter(Objects::nonNull).toList();
+    // 이전연도 추가한 기준연도 값 도협번호만 담기
+    Set<String> compareSet1 =
+        compareMngList.stream()
+            .map(MngListDto::getMapSheetNum)
+            .filter(Objects::nonNull)
+            .collect(Collectors.toSet());

-    // 기준연도 교집합
+    // 기준연도 기준으로 비교연도에 있는것만 담기 (도협번호) 결국 비교년도와 개수가 같아짐
    List<String> mapSheetNums =
        targetMngList.stream()
            .map(MngListDto::getMapSheetNum)
            .filter(Objects::nonNull)
-            .filter(compareNums1::contains)
+            .filter(compareSet1::contains)
            .toList();

    int intersection = mapSheetNums.size();

-    // 서로 같은 것만 남기기: compare 모두 교집합
+    Set<String> intersectionSet = new HashSet<>(mapSheetNums);
+
+    // 비교연도 같은거 담기(dto list)
    compareMngList =
        compareMngList.stream()
            .filter(c -> c.getMapSheetNum() != null)
-            .filter(c -> mapSheetNums.contains(c.getMapSheetNum()))
+            .filter(c -> intersectionSet.contains(c.getMapSheetNum()))
            .toList();

-    // target도 교집합으로 줄이기
+    // 기준연도 같은거 담기(dto list)
    List<MngListDto> filteredTargetMngList =
        targetMngList.stream()
            .filter(t -> t.getMapSheetNum() != null)
-            .filter(t -> mapSheetNums.contains(t.getMapSheetNum()))
+            .filter(t -> intersectionSet.contains(t.getMapSheetNum()))
            .toList();

    // 로그