oom처리

2026-04-15 12:01:53 +09:00
parent 4fcc645f63
commit 0f7d794a38
5 changed files with 193 additions and 165 deletions
--- a/shp-exporter/CLAUDE.md
+++ b/shp-exporter/CLAUDE.md
@@ -4,14 +4,14 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

 ## Project Overview

-Spring Boot 3.5.7 CLI application that converts PostgreSQL PostGIS spatial data to ESRI shapefiles and GeoJSON formats. The application uses **Spring Batch** for memory-efficient processing of large datasets (1M+ records) and supports automatic GeoServer layer registration via REST API.
+Spring Boot 3.5.7 / Java 21 CLI application that converts PostgreSQL PostGIS spatial data to ESRI shapefiles and GeoJSON formats. The application uses **Spring Batch** for memory-efficient processing of large datasets (1M+ records) and supports automatic GeoServer layer registration via REST API.

 **Key Features**:
 - Memory-optimized batch processing (90-95% reduction: 2-13GB → 150-200MB)
 - Chunk-based streaming with cursor pagination (fetch-size: 1000)
 - Automatic geometry validation and type conversion (MultiPolygon → Polygon)
 - Coordinate system validation (EPSG:5186 Korean 2000 / Central Belt)
- Dual execution modes: Spring Batch (recommended) and Legacy mode
+- Three execution modes: Spring Batch (recommended), Legacy, and GeoServer registration-only

 ## Build and Run Commands

@@ -25,6 +25,8 @@ Spring Boot 3.5.7 CLI application that converts PostgreSQL PostGIS spatial data

 Output: `build/libs/shp-exporter.jar` (fixed name, no version suffix)

+> **Note**: The `Dockerfile` currently references `shp-exporter-v2.jar` in its `COPY` step, which does not match the actual build output. Update the Dockerfile if building a Docker image.
+
 ### Run Application

 #### Spring Batch Mode (Recommended)
@@ -113,6 +115,7 @@ ConverterCommandLineRunner
        → JdbcCursorItemReader (fetch-size: 1000)
        → FeatureConversionProcessor (InferenceResult → SimpleFeature)
        → StreamingShapefileWriter (chunk-based append)
+    → Step 2-1: PostShapefileUpdateTasklet (post-export DB UPDATE hook)
    → Step 3: generateGeoJsonStep (chunk-oriented, same pattern)
    → Step 4: CreateZipTasklet (creates .zip for GeoServer)
    → Step 5: GeoServerRegistrationTasklet (conditional, if --geoserver.enabled=true)
@@ -379,6 +382,21 @@ public Step myNewStep(JobRepository jobRepository,
 ```
 4. **Always include `BatchExecutionHistoryListener`** to track execution metrics

+### Post-Export DB Hook (`PostShapefileUpdateTasklet`)
+
+`PostShapefileUpdateTasklet` runs immediately after `generateShapefileStep` and is designed as a placeholder for running UPDATE SQL after shapefile export (e.g., marking rows as exported). The SQL body is intentionally left as a `// TODO` — add your UPDATE statement inside `execute()`:
+
+```java
+// batch/tasklet/PostShapefileUpdateTasklet.java
+int updated = jdbcTemplate.update(
+    "UPDATE some_table SET status = 'EXPORTED' WHERE batch_id = ANY(?)",
+    ps -> {
+      ps.setArray(1, ps.getConnection().createArrayOf("bigint", batchIdList.toArray()));
+    });
+```
+
+Job parameters available: `inferenceId` (String), `batchIds` (comma-separated String → `List<Long>`).
+
 ### Modifying ItemReader Configuration

 ItemReaders are **not thread-safe**. Each step requires its own instance:
--- a/shp-exporter/Dockerfile
+++ b/shp-exporter/Dockerfile
@@ -30,7 +30,7 @@ ENV GEOSERVER_USERNAME=""
 ENV GEOSERVER_PASSWORD=""

 ENTRYPOINT ["java", \
-  "-Xmx4g", "-Xms512m", \
+  "-Xmx128g", "-Xms8g", \
  "-XX:+UseG1GC", \
  "-XX:MaxGCPauseMillis=200", \
  "-XX:G1HeapRegionSize=16m", \
--- a/shp-exporter/src/main/java/com/kamco/makesample/batch/config/MergedModeJobConfig.java
+++ b/shp-exporter/src/main/java/com/kamco/makesample/batch/config/MergedModeJobConfig.java
@@ -6,12 +6,15 @@ import com.kamco.makesample.batch.processor.FeatureConversionProcessor;
 import com.kamco.makesample.batch.tasklet.CreateZipTasklet;
 import com.kamco.makesample.batch.tasklet.GeoServerRegistrationTasklet;
 import com.kamco.makesample.batch.tasklet.GeometryTypeValidationTasklet;
+import com.kamco.makesample.batch.tasklet.PostShapefileUpdateTasklet;
 import com.kamco.makesample.batch.writer.MapIdGeoJsonWriter;
 import com.kamco.makesample.batch.writer.MapIdShapefileWriter;
 import com.kamco.makesample.batch.writer.StreamingGeoJsonWriter;
 import com.kamco.makesample.batch.writer.StreamingShapefileWriter;
 import com.kamco.makesample.model.InferenceResult;
+
 import java.util.Arrays;
+
 import org.geotools.api.feature.simple.SimpleFeature;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -66,6 +69,7 @@ public class MergedModeJobConfig {
    JobRepository jobRepository,
    Step validateGeometryTypeStep,
    Step generateShapefileStep,
+    Step postShapefileUpdateStep,
    Step generateGeoJsonStep,
    Step createZipStep,
    Step registerToGeoServerStep,
@@ -78,6 +82,7 @@ public class MergedModeJobConfig {
      .next(createZipStep)
      .next(registerToGeoServerStep) // Conditional execution
      .next(generateMapIdFilesStep) // Map ID별 개별 파일 생성
+      .next(postShapefileUpdateStep) // Shapefile 생성 후 UPDATE 실행
      .build();
  }

@@ -144,6 +149,28 @@ public class MergedModeJobConfig {
      .build();
  }

+  /**
+   * Step 2-1: Shapefile 생성 후 UPDATE 실행
+   *
+   * @param jobRepository              JobRepository
+   * @param transactionManager         TransactionManager
+   * @param postShapefileUpdateTasklet PostShapefileUpdateTasklet
+   * @param historyListener            BatchExecutionHistoryListener
+   * @return Step
+   */
+  @Bean
+  public Step postShapefileUpdateStep(
+    JobRepository jobRepository,
+    PlatformTransactionManager transactionManager,
+    PostShapefileUpdateTasklet postShapefileUpdateTasklet,
+    BatchExecutionHistoryListener historyListener) {
+
+    return new StepBuilder("postShapefileUpdateStep", jobRepository)
+      .tasklet(postShapefileUpdateTasklet, transactionManager)
+      .listener(historyListener)
+      .build();
+  }
+
  /**
   * Step 3: GeoJSON 생성 (Chunk-oriented)
   *
--- a/shp-exporter/src/main/java/com/kamco/makesample/batch/tasklet/GeometryTypeValidationTasklet.java
+++ b/shp-exporter/src/main/java/com/kamco/makesample/batch/tasklet/GeometryTypeValidationTasklet.java
@@ -139,6 +139,8 @@ public class GeometryTypeValidationTasklet implements Tasklet {
        SELECT COUNT(*) as valid_count
        FROM inference_results_testing
        WHERE batch_id = ANY(?)
+          AND after_c IS NOT NULL
+          AND after_p IS NOT NULL
          AND geometry IS NOT NULL
          AND ST_GeometryType(geometry) IN ('ST_Polygon', 'ST_MultiPolygon')
          AND ST_SRID(geometry) = 5186
--- a/shp-exporter/src/main/java/com/kamco/makesample/batch/writer/StreamingShapefileWriter.java
+++ b/shp-exporter/src/main/java/com/kamco/makesample/batch/writer/StreamingShapefileWriter.java
@@ -10,13 +10,11 @@ import java.nio.file.Paths;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import org.geotools.api.data.SimpleFeatureStore;
+import org.geotools.api.data.FeatureWriter;
 import org.geotools.api.data.Transaction;
 import org.geotools.api.feature.simple.SimpleFeature;
 import org.geotools.api.feature.simple.SimpleFeatureType;
 import org.geotools.api.referencing.crs.CoordinateReferenceSystem;
-import org.geotools.data.DefaultTransaction;
-import org.geotools.data.collection.ListFeatureCollection;
 import org.geotools.data.shapefile.ShapefileDataStore;
 import org.geotools.data.shapefile.ShapefileDataStoreFactory;
 import org.slf4j.Logger;
@@ -69,8 +67,7 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
  private String outputPath;

  private ShapefileDataStore dataStore;
-  private Transaction transaction;
-  private SimpleFeatureStore featureStore;
+  private FeatureWriter<SimpleFeatureType, SimpleFeature> featureWriter;
  private SimpleFeatureType featureType;

  private int chunkCount = 0;
@@ -145,13 +142,9 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
      dataStore = (ShapefileDataStore) factory.createNewDataStore(params);
      dataStore.createSchema(featureType);

-      // Transaction 시작
-      transaction = new DefaultTransaction("create");
-
-      // FeatureStore 가져오기
+      // FeatureWriter를 append 모드로 직접 열기 (Diff 누적 없이 파일에 직접 씀)
      String typeName = dataStore.getTypeNames()[0];
-      featureStore = (SimpleFeatureStore) dataStore.getFeatureSource(typeName);
-      featureStore.setTransaction(transaction);
+      featureWriter = dataStore.getFeatureWriterAppend(typeName, Transaction.AUTO_COMMIT);

      startTimeMs = System.currentTimeMillis();
      log.info("ShapefileDataStore initialized successfully");
@@ -172,10 +165,13 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
    int itemCount = items.size();
    totalRecordCount += itemCount;

-    // FeatureStore에 추가 - GeoTools ShapefileDataStore는 Diff 없이 파일에 직접 씀
-    // 트랜잭션은 afterStep()에서 단일 커밋 (per-chunk 커밋 시 setTransaction()이 .shx 재스캔 → O(n²))
-    ListFeatureCollection collection = new ListFeatureCollection(featureType, items);
-    featureStore.addFeatures(collection);
+    // FeatureWriter로 직접 append - Diff 누적 없이 O(1) per record
+    for (SimpleFeature feature : items) {
+      SimpleFeature newFeature = featureWriter.next();
+      newFeature.setAttributes(feature.getAttributes());
+      newFeature.setDefaultGeometry(feature.getDefaultGeometry());
+      featureWriter.write();
+    }

    if (chunkCount % LOG_INTERVAL_CHUNKS == 0) {
      logProgress();
@@ -191,15 +187,10 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
        chunkCount);

    try {
-      if (transaction != null) {
-        transaction.commit();
-        log.info("Final transaction committed successfully");
-      }
-    } catch (IOException e) {
-      log.error("Failed to commit final transaction", e);
-      throw new ItemStreamException("Failed to commit shapefile transaction", e);
-    } finally {
      cleanup();
+    } catch (Exception e) {
+      log.error("Failed to close shapefile writer", e);
+      throw new ItemStreamException("Failed to close shapefile writer", e);
    }
  }

@@ -212,11 +203,7 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
  public void onError(Exception exception, Chunk<? extends SimpleFeature> chunk) {
    log.error("Error writing chunk #{}: {}", chunkCount, exception.getMessage(), exception);

-    try {
-      if (transaction != null) {
-        transaction.rollback();
-        log.info("Transaction rolled back due to error");
-      }
+    cleanup();

    // 부분 파일 삭제
    File shpFile = new File(outputPath);
@@ -224,12 +211,6 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
      shpFile.delete();
      log.info("Deleted partial shapefile: {}", outputPath);
    }
-
-    } catch (IOException e) {
-      log.error("Failed to rollback transaction", e);
-    } finally {
-      cleanup();
-    }
  }

  private void logProgress() {
@@ -264,13 +245,13 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
  }

  private void cleanup() {
-    if (transaction != null) {
+    if (featureWriter != null) {
      try {
-        transaction.close();
+        featureWriter.close();
      } catch (IOException e) {
-        log.warn("Failed to close transaction", e);
+        log.warn("Failed to close feature writer", e);
      }
-      transaction = null;
+      featureWriter = null;
    }

    if (dataStore != null) {