oom처리

This commit is contained in:
dean
2026-04-15 12:01:53 +09:00
parent 4fcc645f63
commit 0f7d794a38
5 changed files with 193 additions and 165 deletions

View File

@@ -4,14 +4,14 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## Project Overview ## Project Overview
Spring Boot 3.5.7 CLI application that converts PostgreSQL PostGIS spatial data to ESRI shapefiles and GeoJSON formats. The application uses **Spring Batch** for memory-efficient processing of large datasets (1M+ records) and supports automatic GeoServer layer registration via REST API. Spring Boot 3.5.7 / Java 21 CLI application that converts PostgreSQL PostGIS spatial data to ESRI shapefiles and GeoJSON formats. The application uses **Spring Batch** for memory-efficient processing of large datasets (1M+ records) and supports automatic GeoServer layer registration via REST API.
**Key Features**: **Key Features**:
- Memory-optimized batch processing (90-95% reduction: 2-13GB → 150-200MB) - Memory-optimized batch processing (90-95% reduction: 2-13GB → 150-200MB)
- Chunk-based streaming with cursor pagination (fetch-size: 1000) - Chunk-based streaming with cursor pagination (fetch-size: 1000)
- Automatic geometry validation and type conversion (MultiPolygon → Polygon) - Automatic geometry validation and type conversion (MultiPolygon → Polygon)
- Coordinate system validation (EPSG:5186 Korean 2000 / Central Belt) - Coordinate system validation (EPSG:5186 Korean 2000 / Central Belt)
- Dual execution modes: Spring Batch (recommended) and Legacy mode - Three execution modes: Spring Batch (recommended), Legacy, and GeoServer registration-only
## Build and Run Commands ## Build and Run Commands
@@ -25,6 +25,8 @@ Spring Boot 3.5.7 CLI application that converts PostgreSQL PostGIS spatial data
Output: `build/libs/shp-exporter.jar` (fixed name, no version suffix) Output: `build/libs/shp-exporter.jar` (fixed name, no version suffix)
> **Note**: The `Dockerfile` currently references `shp-exporter-v2.jar` in its `COPY` step, which does not match the actual build output. Update the Dockerfile if building a Docker image.
### Run Application ### Run Application
#### Spring Batch Mode (Recommended) #### Spring Batch Mode (Recommended)
@@ -113,6 +115,7 @@ ConverterCommandLineRunner
→ JdbcCursorItemReader (fetch-size: 1000) → JdbcCursorItemReader (fetch-size: 1000)
→ FeatureConversionProcessor (InferenceResult → SimpleFeature) → FeatureConversionProcessor (InferenceResult → SimpleFeature)
→ StreamingShapefileWriter (chunk-based append) → StreamingShapefileWriter (chunk-based append)
→ Step 2-1: PostShapefileUpdateTasklet (post-export DB UPDATE hook)
→ Step 3: generateGeoJsonStep (chunk-oriented, same pattern) → Step 3: generateGeoJsonStep (chunk-oriented, same pattern)
→ Step 4: CreateZipTasklet (creates .zip for GeoServer) → Step 4: CreateZipTasklet (creates .zip for GeoServer)
→ Step 5: GeoServerRegistrationTasklet (conditional, if --geoserver.enabled=true) → Step 5: GeoServerRegistrationTasklet (conditional, if --geoserver.enabled=true)
@@ -379,6 +382,21 @@ public Step myNewStep(JobRepository jobRepository,
``` ```
4. **Always include `BatchExecutionHistoryListener`** to track execution metrics 4. **Always include `BatchExecutionHistoryListener`** to track execution metrics
### Post-Export DB Hook (`PostShapefileUpdateTasklet`)
`PostShapefileUpdateTasklet` runs immediately after `generateShapefileStep` and is designed as a placeholder for running UPDATE SQL after shapefile export (e.g., marking rows as exported). The SQL body is intentionally left as a `// TODO` — add your UPDATE statement inside `execute()`:
```java
// batch/tasklet/PostShapefileUpdateTasklet.java
int updated = jdbcTemplate.update(
"UPDATE some_table SET status = 'EXPORTED' WHERE batch_id = ANY(?)",
ps -> {
ps.setArray(1, ps.getConnection().createArrayOf("bigint", batchIdList.toArray()));
});
```
Job parameters available: `inferenceId` (String), `batchIds` (comma-separated String → `List<Long>`).
### Modifying ItemReader Configuration ### Modifying ItemReader Configuration
ItemReaders are **not thread-safe**. Each step requires its own instance: ItemReaders are **not thread-safe**. Each step requires its own instance:

View File

@@ -30,7 +30,7 @@ ENV GEOSERVER_USERNAME=""
ENV GEOSERVER_PASSWORD="" ENV GEOSERVER_PASSWORD=""
ENTRYPOINT ["java", \ ENTRYPOINT ["java", \
"-Xmx4g", "-Xms512m", \ "-Xmx128g", "-Xms8g", \
"-XX:+UseG1GC", \ "-XX:+UseG1GC", \
"-XX:MaxGCPauseMillis=200", \ "-XX:MaxGCPauseMillis=200", \
"-XX:G1HeapRegionSize=16m", \ "-XX:G1HeapRegionSize=16m", \

View File

@@ -6,12 +6,15 @@ import com.kamco.makesample.batch.processor.FeatureConversionProcessor;
import com.kamco.makesample.batch.tasklet.CreateZipTasklet; import com.kamco.makesample.batch.tasklet.CreateZipTasklet;
import com.kamco.makesample.batch.tasklet.GeoServerRegistrationTasklet; import com.kamco.makesample.batch.tasklet.GeoServerRegistrationTasklet;
import com.kamco.makesample.batch.tasklet.GeometryTypeValidationTasklet; import com.kamco.makesample.batch.tasklet.GeometryTypeValidationTasklet;
import com.kamco.makesample.batch.tasklet.PostShapefileUpdateTasklet;
import com.kamco.makesample.batch.writer.MapIdGeoJsonWriter; import com.kamco.makesample.batch.writer.MapIdGeoJsonWriter;
import com.kamco.makesample.batch.writer.MapIdShapefileWriter; import com.kamco.makesample.batch.writer.MapIdShapefileWriter;
import com.kamco.makesample.batch.writer.StreamingGeoJsonWriter; import com.kamco.makesample.batch.writer.StreamingGeoJsonWriter;
import com.kamco.makesample.batch.writer.StreamingShapefileWriter; import com.kamco.makesample.batch.writer.StreamingShapefileWriter;
import com.kamco.makesample.model.InferenceResult; import com.kamco.makesample.model.InferenceResult;
import java.util.Arrays; import java.util.Arrays;
import org.geotools.api.feature.simple.SimpleFeature; import org.geotools.api.feature.simple.SimpleFeature;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@@ -66,6 +69,7 @@ public class MergedModeJobConfig {
JobRepository jobRepository, JobRepository jobRepository,
Step validateGeometryTypeStep, Step validateGeometryTypeStep,
Step generateShapefileStep, Step generateShapefileStep,
Step postShapefileUpdateStep,
Step generateGeoJsonStep, Step generateGeoJsonStep,
Step createZipStep, Step createZipStep,
Step registerToGeoServerStep, Step registerToGeoServerStep,
@@ -78,6 +82,7 @@ public class MergedModeJobConfig {
.next(createZipStep) .next(createZipStep)
.next(registerToGeoServerStep) // Conditional execution .next(registerToGeoServerStep) // Conditional execution
.next(generateMapIdFilesStep) // Map ID별 개별 파일 생성 .next(generateMapIdFilesStep) // Map ID별 개별 파일 생성
.next(postShapefileUpdateStep) // Shapefile 생성 후 UPDATE 실행
.build(); .build();
} }
@@ -144,6 +149,28 @@ public class MergedModeJobConfig {
.build(); .build();
} }
/**
* Step 2-1: Shapefile 생성 후 UPDATE 실행
*
* @param jobRepository JobRepository
* @param transactionManager TransactionManager
* @param postShapefileUpdateTasklet PostShapefileUpdateTasklet
* @param historyListener BatchExecutionHistoryListener
* @return Step
*/
@Bean
public Step postShapefileUpdateStep(
JobRepository jobRepository,
PlatformTransactionManager transactionManager,
PostShapefileUpdateTasklet postShapefileUpdateTasklet,
BatchExecutionHistoryListener historyListener) {
return new StepBuilder("postShapefileUpdateStep", jobRepository)
.tasklet(postShapefileUpdateTasklet, transactionManager)
.listener(historyListener)
.build();
}
/** /**
* Step 3: GeoJSON 생성 (Chunk-oriented) * Step 3: GeoJSON 생성 (Chunk-oriented)
* *

View File

@@ -139,6 +139,8 @@ public class GeometryTypeValidationTasklet implements Tasklet {
SELECT COUNT(*) as valid_count SELECT COUNT(*) as valid_count
FROM inference_results_testing FROM inference_results_testing
WHERE batch_id = ANY(?) WHERE batch_id = ANY(?)
AND after_c IS NOT NULL
AND after_p IS NOT NULL
AND geometry IS NOT NULL AND geometry IS NOT NULL
AND ST_GeometryType(geometry) IN ('ST_Polygon', 'ST_MultiPolygon') AND ST_GeometryType(geometry) IN ('ST_Polygon', 'ST_MultiPolygon')
AND ST_SRID(geometry) = 5186 AND ST_SRID(geometry) = 5186

View File

@@ -10,13 +10,11 @@ import java.nio.file.Paths;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.geotools.api.data.SimpleFeatureStore; import org.geotools.api.data.FeatureWriter;
import org.geotools.api.data.Transaction; import org.geotools.api.data.Transaction;
import org.geotools.api.feature.simple.SimpleFeature; import org.geotools.api.feature.simple.SimpleFeature;
import org.geotools.api.feature.simple.SimpleFeatureType; import org.geotools.api.feature.simple.SimpleFeatureType;
import org.geotools.api.referencing.crs.CoordinateReferenceSystem; import org.geotools.api.referencing.crs.CoordinateReferenceSystem;
import org.geotools.data.DefaultTransaction;
import org.geotools.data.collection.ListFeatureCollection;
import org.geotools.data.shapefile.ShapefileDataStore; import org.geotools.data.shapefile.ShapefileDataStore;
import org.geotools.data.shapefile.ShapefileDataStoreFactory; import org.geotools.data.shapefile.ShapefileDataStoreFactory;
import org.slf4j.Logger; import org.slf4j.Logger;
@@ -69,8 +67,7 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
private String outputPath; private String outputPath;
private ShapefileDataStore dataStore; private ShapefileDataStore dataStore;
private Transaction transaction; private FeatureWriter<SimpleFeatureType, SimpleFeature> featureWriter;
private SimpleFeatureStore featureStore;
private SimpleFeatureType featureType; private SimpleFeatureType featureType;
private int chunkCount = 0; private int chunkCount = 0;
@@ -145,13 +142,9 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
dataStore = (ShapefileDataStore) factory.createNewDataStore(params); dataStore = (ShapefileDataStore) factory.createNewDataStore(params);
dataStore.createSchema(featureType); dataStore.createSchema(featureType);
// Transaction 시작 // FeatureWriter를 append 모드로 직접 열기 (Diff 누적 없이 파일에 직접 씀)
transaction = new DefaultTransaction("create");
// FeatureStore 가져오기
String typeName = dataStore.getTypeNames()[0]; String typeName = dataStore.getTypeNames()[0];
featureStore = (SimpleFeatureStore) dataStore.getFeatureSource(typeName); featureWriter = dataStore.getFeatureWriterAppend(typeName, Transaction.AUTO_COMMIT);
featureStore.setTransaction(transaction);
startTimeMs = System.currentTimeMillis(); startTimeMs = System.currentTimeMillis();
log.info("ShapefileDataStore initialized successfully"); log.info("ShapefileDataStore initialized successfully");
@@ -172,10 +165,13 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
int itemCount = items.size(); int itemCount = items.size();
totalRecordCount += itemCount; totalRecordCount += itemCount;
// FeatureStore에 추가 - GeoTools ShapefileDataStore는 Diff 없이 파일에 직접 씀 // FeatureWriter로 직접 append - Diff 누적 없이 O(1) per record
// 트랜잭션은 afterStep()에서 단일 커밋 (per-chunk 커밋 시 setTransaction()이 .shx 재스캔 → O(n²)) for (SimpleFeature feature : items) {
ListFeatureCollection collection = new ListFeatureCollection(featureType, items); SimpleFeature newFeature = featureWriter.next();
featureStore.addFeatures(collection); newFeature.setAttributes(feature.getAttributes());
newFeature.setDefaultGeometry(feature.getDefaultGeometry());
featureWriter.write();
}
if (chunkCount % LOG_INTERVAL_CHUNKS == 0) { if (chunkCount % LOG_INTERVAL_CHUNKS == 0) {
logProgress(); logProgress();
@@ -191,15 +187,10 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
chunkCount); chunkCount);
try { try {
if (transaction != null) {
transaction.commit();
log.info("Final transaction committed successfully");
}
} catch (IOException e) {
log.error("Failed to commit final transaction", e);
throw new ItemStreamException("Failed to commit shapefile transaction", e);
} finally {
cleanup(); cleanup();
} catch (Exception e) {
log.error("Failed to close shapefile writer", e);
throw new ItemStreamException("Failed to close shapefile writer", e);
} }
} }
@@ -212,11 +203,7 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
public void onError(Exception exception, Chunk<? extends SimpleFeature> chunk) { public void onError(Exception exception, Chunk<? extends SimpleFeature> chunk) {
log.error("Error writing chunk #{}: {}", chunkCount, exception.getMessage(), exception); log.error("Error writing chunk #{}: {}", chunkCount, exception.getMessage(), exception);
try { cleanup();
if (transaction != null) {
transaction.rollback();
log.info("Transaction rolled back due to error");
}
// 부분 파일 삭제 // 부분 파일 삭제
File shpFile = new File(outputPath); File shpFile = new File(outputPath);
@@ -224,12 +211,6 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
shpFile.delete(); shpFile.delete();
log.info("Deleted partial shapefile: {}", outputPath); log.info("Deleted partial shapefile: {}", outputPath);
} }
} catch (IOException e) {
log.error("Failed to rollback transaction", e);
} finally {
cleanup();
}
} }
private void logProgress() { private void logProgress() {
@@ -264,13 +245,13 @@ public class StreamingShapefileWriter implements ItemStreamWriter<SimpleFeature>
} }
private void cleanup() { private void cleanup() {
if (transaction != null) { if (featureWriter != null) {
try { try {
transaction.close(); featureWriter.close();
} catch (IOException e) { } catch (IOException e) {
log.warn("Failed to close transaction", e); log.warn("Failed to close feature writer", e);
} }
transaction = null; featureWriter = null;
} }
if (dataStore != null) { if (dataStore != null) {