diff --git a/shp-exporter/.gitignore b/shp-exporter/.gitignore index e905136..cc75f05 100644 --- a/shp-exporter/.gitignore +++ b/shp-exporter/.gitignore @@ -76,3 +76,11 @@ docker-compose.override.yml *.jar *.class /build + +### Output directories ### +export/ +merge/ + +### Documentation (temporary) ### +claudedocs/ + diff --git a/shp-exporter/.gradle/8.14.3/checksums/checksums.lock b/shp-exporter/.gradle/8.14.3/checksums/checksums.lock deleted file mode 100755 index 79e108c..0000000 Binary files a/shp-exporter/.gradle/8.14.3/checksums/checksums.lock and /dev/null differ diff --git a/shp-exporter/.gradle/8.14.3/checksums/md5-checksums.bin b/shp-exporter/.gradle/8.14.3/checksums/md5-checksums.bin deleted file mode 100755 index 2389ef5..0000000 Binary files a/shp-exporter/.gradle/8.14.3/checksums/md5-checksums.bin and /dev/null differ diff --git a/shp-exporter/.gradle/8.14.3/checksums/sha1-checksums.bin b/shp-exporter/.gradle/8.14.3/checksums/sha1-checksums.bin deleted file mode 100755 index 1eb2faf..0000000 Binary files a/shp-exporter/.gradle/8.14.3/checksums/sha1-checksums.bin and /dev/null differ diff --git a/shp-exporter/.gradle/8.14.3/executionHistory/executionHistory.bin b/shp-exporter/.gradle/8.14.3/executionHistory/executionHistory.bin deleted file mode 100755 index 2af6470..0000000 Binary files a/shp-exporter/.gradle/8.14.3/executionHistory/executionHistory.bin and /dev/null differ diff --git a/shp-exporter/.gradle/8.14.3/executionHistory/executionHistory.lock b/shp-exporter/.gradle/8.14.3/executionHistory/executionHistory.lock deleted file mode 100755 index a339036..0000000 Binary files a/shp-exporter/.gradle/8.14.3/executionHistory/executionHistory.lock and /dev/null differ diff --git a/shp-exporter/.gradle/8.14.3/fileChanges/last-build.bin b/shp-exporter/.gradle/8.14.3/fileChanges/last-build.bin deleted file mode 100755 index f76dd23..0000000 Binary files a/shp-exporter/.gradle/8.14.3/fileChanges/last-build.bin and /dev/null differ diff --git a/shp-exporter/.gradle/8.14.3/fileHashes/fileHashes.bin b/shp-exporter/.gradle/8.14.3/fileHashes/fileHashes.bin deleted file mode 100755 index 069f5fb..0000000 Binary files a/shp-exporter/.gradle/8.14.3/fileHashes/fileHashes.bin and /dev/null differ diff --git a/shp-exporter/.gradle/8.14.3/fileHashes/fileHashes.lock b/shp-exporter/.gradle/8.14.3/fileHashes/fileHashes.lock deleted file mode 100755 index 2af1ae6..0000000 Binary files a/shp-exporter/.gradle/8.14.3/fileHashes/fileHashes.lock and /dev/null differ diff --git a/shp-exporter/.gradle/8.14.3/fileHashes/resourceHashesCache.bin b/shp-exporter/.gradle/8.14.3/fileHashes/resourceHashesCache.bin deleted file mode 100755 index 8261caf..0000000 Binary files a/shp-exporter/.gradle/8.14.3/fileHashes/resourceHashesCache.bin and /dev/null differ diff --git a/shp-exporter/.gradle/8.14.3/gc.properties b/shp-exporter/.gradle/8.14.3/gc.properties deleted file mode 100644 index e69de29..0000000 diff --git a/shp-exporter/.gradle/buildOutputCleanup/buildOutputCleanup.lock b/shp-exporter/.gradle/buildOutputCleanup/buildOutputCleanup.lock deleted file mode 100755 index b1a64c9..0000000 Binary files a/shp-exporter/.gradle/buildOutputCleanup/buildOutputCleanup.lock and /dev/null differ diff --git a/shp-exporter/.gradle/buildOutputCleanup/cache.properties b/shp-exporter/.gradle/buildOutputCleanup/cache.properties deleted file mode 100755 index 2ac9937..0000000 --- a/shp-exporter/.gradle/buildOutputCleanup/cache.properties +++ /dev/null @@ -1,2 +0,0 @@ -#Wed Jan 14 15:14:03 KST 2026 -gradle.version=8.14.3 diff --git a/shp-exporter/.gradle/buildOutputCleanup/outputFiles.bin b/shp-exporter/.gradle/buildOutputCleanup/outputFiles.bin deleted file mode 100755 index 50c5eef..0000000 Binary files a/shp-exporter/.gradle/buildOutputCleanup/outputFiles.bin and /dev/null differ diff --git a/shp-exporter/.gradle/file-system.probe b/shp-exporter/.gradle/file-system.probe deleted file mode 100755 index 8399743..0000000 Binary files a/shp-exporter/.gradle/file-system.probe and /dev/null differ diff --git a/shp-exporter/.gradle/vcs-1/gc.properties b/shp-exporter/.gradle/vcs-1/gc.properties deleted file mode 100644 index e69de29..0000000 diff --git a/shp-exporter/.idea/.gitignore b/shp-exporter/.idea/.gitignore deleted file mode 100755 index 13566b8..0000000 --- a/shp-exporter/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/shp-exporter/.idea/compiler.xml b/shp-exporter/.idea/compiler.xml deleted file mode 100755 index e5a8359..0000000 --- a/shp-exporter/.idea/compiler.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/shp-exporter/.idea/gradle.xml b/shp-exporter/.idea/gradle.xml deleted file mode 100755 index b06d5a2..0000000 --- a/shp-exporter/.idea/gradle.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/shp-exporter/.idea/jarRepositories.xml b/shp-exporter/.idea/jarRepositories.xml deleted file mode 100755 index ef0f9ab..0000000 --- a/shp-exporter/.idea/jarRepositories.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/shp-exporter/.idea/makesample_geoserver.iml b/shp-exporter/.idea/makesample_geoserver.iml deleted file mode 100755 index d6ebd48..0000000 --- a/shp-exporter/.idea/makesample_geoserver.iml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/shp-exporter/.idea/misc.xml b/shp-exporter/.idea/misc.xml deleted file mode 100755 index fe0b0da..0000000 --- a/shp-exporter/.idea/misc.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/shp-exporter/.idea/vcs.xml b/shp-exporter/.idea/vcs.xml deleted file mode 100644 index 6c0b863..0000000 --- a/shp-exporter/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/shp-exporter/CLAUDE.md b/shp-exporter/CLAUDE.md index bad6f2e..e445afd 100755 --- a/shp-exporter/CLAUDE.md +++ b/shp-exporter/CLAUDE.md @@ -4,27 +4,51 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -Spring Boot CLI application that queries PostgreSQL PostGIS spatial data and converts it to ESRI shapefiles and GeoJSON. The application processes AI inference results from the KAMCO database and generates geographic data files for visualization in GIS applications. It also supports automatic registration of shapefiles to GeoServer via REST API. +Spring Boot 3.5.7 CLI application that converts PostgreSQL PostGIS spatial data to ESRI shapefiles and GeoJSON formats. The application uses **Spring Batch** for memory-efficient processing of large datasets (1M+ records) and supports automatic GeoServer layer registration via REST API. + +**Key Features**: +- Memory-optimized batch processing (90-95% reduction: 2-13GB → 150-200MB) +- Chunk-based streaming with cursor pagination (fetch-size: 1000) +- Automatic geometry validation and type conversion (MultiPolygon → Polygon) +- Coordinate system validation (EPSG:5186 Korean 2000 / Central Belt) +- Dual execution modes: Spring Batch (recommended) and Legacy mode ## Build and Run Commands ### Build ```bash -./gradlew build +./gradlew build # Full build with tests +./gradlew clean build -x test # Skip tests +./gradlew spotlessApply # Apply Google Java Format (2-space indentation) +./gradlew spotlessCheck # Verify formatting without applying ``` -The built JAR will be named `shp-exporter.jar` (configured in `bootJar` task). +Output: `build/libs/shp-exporter.jar` (fixed name, no version suffix) ### Run Application -#### Generate Shapefiles +#### Spring Batch Mode (Recommended) ```bash -./gradlew bootRun +# Generate shapefile + GeoJSON +./gradlew bootRun --args="--batch --converter.batch-ids[0]=252" + +# With GeoServer registration +export GEOSERVER_USERNAME=admin +export GEOSERVER_PASSWORD=geoserver +./gradlew bootRun --args="--batch --geoserver.enabled=true --converter.batch-ids[0]=252" + +# Using JAR (production) +java -jar build/libs/shp-exporter.jar \ + --batch \ + --converter.inference-id=D5E46F60FC40B1A8BE0CD1F3547AA6 \ + --converter.batch-ids[0]=252 \ + --converter.batch-ids[1]=253 ``` -Or using JAR: +#### Legacy Mode (Small Datasets Only) ```bash -java -jar build/libs/shp-exporter.jar +./gradlew bootRun # No --batch flag +# Warning: May OOM on large datasets ``` #### Upload Shapefile to GeoServer @@ -71,122 +95,410 @@ By default, the application runs with `spring.profiles.active=prod` (set in `app ## Architecture -### Processing Pipeline -The application follows a layered architecture with a linear data flow: +### Dual Execution Modes -1. **CLI Entry** (`ConverterCommandLineRunner`) → Parses command-line args and routes to either shapefile generation or GeoServer upload -2. **Service Orchestration** (`ShapefileConverterService`) → Coordinates the conversion workflow based on mode (MERGED, MAP_IDS, or RESOLVE) -3. **Data Access** (`InferenceResultRepository`) → Queries PostGIS database using `PreparedStatementCreator` for PostgreSQL array parameters -4. **Geometry Conversion** (`GeometryConverter`) → Converts PostGIS WKT format to JTS Geometry objects using `WKTReader` -5. **File Writing** (`ShapefileWriter`, `GeoJsonWriter`, `ResultZipWriter`) → Generates output files using GeoTools -6. **GeoServer Integration** (`GeoServerRegistrationService`) → Registers shapefiles to GeoServer via REST API (optional) +The application supports two execution modes with distinct processing pipelines: -### Key Design Points +#### Spring Batch Mode (Recommended) +**Trigger**: `--batch` flag +**Use Case**: Large datasets (100K+ records), production workloads +**Memory**: 150-200MB constant (chunk-based streaming) -**Conversion Modes**: The application supports three execution modes controlled by `converter.mode`: -- `MERGED`: Creates a single shapefile for all data matching `batch-ids` (ignores `map-ids`) -- `MAP_IDS`: Processes only the `map-ids` specified in configuration (requires `map-ids` to be set) -- `RESOLVE`: Queries the database for all distinct `map-ids` matching `batch-ids`, then processes each (avoids OS command-line length limits) -- If `mode` is unspecified: defaults to `MERGED` if `map-ids` is empty, otherwise `MAP_IDS` - -**Geometry Handling**: Two-step conversion process: -- PostGIS returns geometries as WKT (Well-Known Text) via `ST_AsText(geometry)` in SQL query -- `GeometryConverter` parses WKT to JTS `Geometry` objects using `WKTReader` -- `ShapefileWriter` uses JTS geometries with GeoTools to write shapefile artifacts (.shp, .shx, .dbf, .prj) - -**Shapefile Constraints**: -- Validates all geometries are homogeneous (same type) via `ShapefileConverterService.validateGeometries()` -- Shapefiles cannot contain mixed geometry types (e.g., cannot mix Polygon and Point) -- Geometry type determined from first valid geometry in result set - -**Output Structure**: -- For MAP_IDS/RESOLVE mode: `{output-base-dir}/{inference-id}/{map-id}/` -- For MERGED mode: `{output-base-dir}/{inference-id}/merge/` -- Each directory contains: `.shp`, `.shx`, `.dbf`, `.prj`, `.geojson`, and `.zip` files - -**PostgreSQL Array Parameters**: The repository uses `PreparedStatementCreator` to handle PostgreSQL array syntax: -```java -Array batchIdsArray = con.createArrayOf("bigint", batchIds.toArray()); -ps.setArray(1, batchIdsArray); +**Pipeline Flow**: +``` +ConverterCommandLineRunner + → JobLauncher.run(mergedModeJob) + → Step 1: GeometryTypeValidationTasklet (validates geometry homogeneity) + → Step 2: generateShapefileStep (chunk-oriented) + → JdbcCursorItemReader (fetch-size: 1000) + → FeatureConversionProcessor (InferenceResult → SimpleFeature) + → StreamingShapefileWriter (chunk-based append) + → Step 3: generateGeoJsonStep (chunk-oriented, same pattern) + → Step 4: CreateZipTasklet (creates .zip for GeoServer) + → Step 5: GeoServerRegistrationTasklet (conditional, if --geoserver.enabled=true) + → Step 6: generateMapIdFilesStep (partitioned, sequential map_id processing) ``` -This enables `WHERE batch_id = ANY(?)` queries. -**GeoServer Integration**: -- Workspace 'cd' must be pre-created in GeoServer before registration -- Uses environment variables `GEOSERVER_USERNAME` and `GEOSERVER_PASSWORD` for authentication -- Supports automatic deletion and re-registration when `overwrite-existing: true` -- Non-blocking: registration failures are logged but don't stop the application +**Key Components**: +- `JdbcCursorItemReader`: Cursor-based streaming (no full result set loading) +- `StreamingShapefileWriter`: Opens GeoTools transaction, writes chunks incrementally, commits at end +- `GeometryTypeValidationTasklet`: Pre-validates with SQL `DISTINCT ST_GeometryType()`, auto-converts MultiPolygon +- `CompositeItemWriter`: Simultaneously writes shapefile and GeoJSON in map_id worker step + +#### Legacy Mode +**Trigger**: No `--batch` flag (deprecated) +**Use Case**: Small datasets (<10K records) +**Memory**: 1.4-9GB (loads entire result set) + +**Pipeline Flow**: +``` +ConverterCommandLineRunner + → ShapefileConverterService.convertAll() + → InferenceResultRepository.findByBatchIds() (full List) + → validateGeometries() (in-memory validation) + → ShapefileWriter.write() (DefaultFeatureCollection accumulation) + → GeoJsonWriter.write() +``` + +### Key Design Patterns + +**Geometry Type Validation & Auto-Conversion**: +- Pre-validation step runs SQL `SELECT DISTINCT ST_GeometryType(geometry)` to detect mixed types +- Supports automatic conversion: `ST_MultiPolygon` → `ST_Polygon` (extracts first polygon only) +- Fails fast on unsupported mixed types (e.g., Polygon + LineString) +- Validates EPSG:5186 coordinate bounds (X: 125-530km, Y: -600-988km) and ST_IsValid() +- See `GeometryTypeValidationTasklet` (batch/tasklet/GeometryTypeValidationTasklet.java:1-290) + +**WKT to JTS Conversion Pipeline**: +1. PostGIS query returns `ST_AsText(geometry)` as WKT string +2. `GeometryConvertingRowMapper` converts ResultSet row to `InferenceResult` with WKT string (batch/reader/GeometryConvertingRowMapper.java:1-74) +3. `FeatureConversionProcessor` uses `GeometryConverter.parseGeometry()` to convert WKT → JTS Geometry (service/GeometryConverter.java:1-92) +4. `StreamingShapefileWriter` wraps JTS geometry in GeoTools `SimpleFeature` and writes to shapefile + +**Chunk-Based Transaction Management** (Spring Batch only): +```java +// StreamingShapefileWriter +@BeforeStep +public void open() { + transaction = new DefaultTransaction("create"); + featureStore.setTransaction(transaction); // Long-running transaction +} + +@Override +public void write(Chunk chunk) { + ListFeatureCollection collection = new ListFeatureCollection(featureType, chunk.getItems()); + featureStore.addFeatures(collection); // Append chunk to shapefile + // chunk goes out of scope → GC eligible +} + +@AfterStep +public void afterStep() { + transaction.commit(); // Commit all chunks at once + transaction.close(); +} +``` + +**PostgreSQL Array Parameter Handling**: +```java +// InferenceResultItemReaderConfig uses PreparedStatementSetter +ps -> { + Array batchIdsArray = ps.getConnection().createArrayOf("bigint", batchIds.toArray()); + ps.setArray(1, batchIdsArray); // WHERE batch_id = ANY(?) + ps.setString(2, mapId); +} +``` + +**Output Directory Strategy**: +- Batch mode (MERGED): `{output-base-dir}/{inference-id}/merge/` → Single merged shapefile + GeoJSON +- Batch mode (map_id partitioning): `{output-base-dir}/{inference-id}/{map-id}/` → Per-map_id files +- Legacy mode: `{output-base-dir}/{inference-id}/{map-id}/` (no merge folder) + +**GeoServer Registration**: +- Only shapefile ZIP is uploaded (GeoJSON not registered) +- Requires pre-created workspace 'cd' and environment variables for auth +- Conditional execution via JobParameter `geoserver.enabled` +- Non-blocking: failures logged but don't stop batch job ## Configuration -Configuration files are located in `src/main/resources/`: -- `application.yml`: Base configuration (sets active profile) -- `application-prod.yml`: Production database and converter settings -- `application-dev.yml`: Development settings -- `application-local.yml`: Local development settings +### Profile System +- Default profile: `prod` (set in application.yml) +- Configuration hierarchy: `application.yml` → `application-{profile}.yml` +- Override via: `--spring.profiles.active=dev` -### Converter Configuration +### Key Configuration Properties + +**Converter Settings** (`ConverterProperties.java`): ```yaml converter: - inference-id: 'D5E46F60FC40B1A8BE0CD1F3547AA6' - map-ids: [] # Optional: list of map_ids, or empty for merged mode - batch-ids: [252, 253, 257] # Required: batch ID filter - mode: 'MERGED' # Optional: MERGED, MAP_IDS, or RESOLVE + inference-id: 'D5E46F60FC40B1A8BE0CD1F3547AA6' # Output folder name + batch-ids: [252, 253, 257] # PostgreSQL batch_id filter (required) + map-ids: [] # Legacy mode only (ignored in batch mode) + mode: 'MERGED' # Legacy mode only: MERGED, MAP_IDS, or RESOLVE output-base-dir: '/data/model_output/export/' - crs: 'EPSG:5186' # Korean 2000 / Central Belt CRS + crs: 'EPSG:5186' # Korean 2000 / Central Belt + + batch: + chunk-size: 1000 # Records per chunk (affects memory usage) + fetch-size: 1000 # JDBC cursor fetch size + skip-limit: 100 # Max skippable records per chunk + enable-partitioning: false # Future: parallel map_id processing ``` -### GeoServer Configuration +**GeoServer Settings** (`GeoServerProperties.java`): ```yaml geoserver: base-url: 'https://kamco.geo-dev.gs.dabeeo.com/geoserver' - workspace: 'cd' - overwrite-existing: true - connection-timeout: 30000 - read-timeout: 60000 - username: 'admin' # Optional: prefer environment variables - password: 'geoserver' # Optional: prefer environment variables + workspace: 'cd' # Must be pre-created in GeoServer + overwrite-existing: true # Delete existing layer before registration + connection-timeout: 30000 # 30 seconds + read-timeout: 60000 # 60 seconds + # Credentials from environment variables (preferred): + # GEOSERVER_USERNAME, GEOSERVER_PASSWORD +``` + +**Spring Batch Metadata**: +```yaml +spring: + batch: + job: + enabled: false # Prevent auto-run on startup + jdbc: + initialize-schema: always # Auto-create BATCH_* tables ``` ## Database Integration -### Query Pattern -All queries filter by `batch_id = ANY(?)` and include `after_c IS NOT NULL AND after_p IS NOT NULL` to ensure data quality. +### Query Strategies -Primary queries: -- `findByMapId(batchIds, mapId)`: Retrieve records for a specific map_id -- `findByBatchIds(batchIds)`: Retrieve all records for batch_ids (merged mode) -- `findMapIdByBatchIds(batchIds)`: Query distinct map_ids for RESOLVE mode +**Spring Batch Mode** (streaming): +```sql +-- InferenceResultItemReaderConfig.java +SELECT uid, map_id, probability, before_year, after_year, + before_c, before_p, after_c, after_p, + ST_AsText(geometry) as geometry_wkt +FROM inference_results_testing +WHERE batch_id = ANY(?) + AND ST_GeometryType(geometry) IN ('ST_Polygon', 'ST_MultiPolygon') + AND ST_SRID(geometry) = 5186 + AND ST_X(ST_Centroid(geometry)) BETWEEN 125000 AND 530000 + AND ST_Y(ST_Centroid(geometry)) BETWEEN -600000 AND 988000 + AND ST_IsValid(geometry) = true +ORDER BY map_id, uid +-- Uses server-side cursor with fetch-size=1000 +``` + +**Legacy Mode** (full load): +```sql +-- InferenceResultRepository.java +SELECT uid, map_id, probability, before_year, after_year, + before_c, before_p, after_c, after_p, + ST_AsText(geometry) as geometry_wkt +FROM inference_results_testing +WHERE batch_id = ANY(?) AND map_id = ? +-- Returns full List in memory +``` + +**Geometry Type Validation**: +```sql +-- GeometryTypeValidationTasklet.java +SELECT DISTINCT ST_GeometryType(geometry) +FROM inference_results_testing +WHERE batch_id = ANY(?) AND geometry IS NOT NULL +-- Pre-validates homogeneous geometry requirement +``` ### Field Mapping -Database columns map to shapefile fields (note: shapefile field names limited to 10 characters): +Database columns map to shapefile fields (10-character limit): -| Database Column | DB Type | Shapefile Field | Shapefile Type | -|-----------------|---------|-----------------|----------------| -| uid | uuid | uid | String | -| map_id | text | map_id | String | -| probability | float8 | chn_dtct_p | String | -| before_year | bigint | cprs_yr | Long | -| after_year | bigint | crtr_yr | Long | -| before_c | text | bf_cls_cd | String | -| before_p | float8 | bf_cls_pro | String | -| after_c | text | af_cls_cd | String | -| after_p | float8 | af_cls_pro | String | -| geometry | geom | the_geom | Polygon | +| Database Column | DB Type | Shapefile Field | Shapefile Type | Notes | +|-----------------|---------|-----------------|----------------|-------| +| uid | uuid | chnDtctId | String | Change detection ID | +| map_id | text | mpqd_no | String | Map quadrant number | +| probability | float8 | chn_dtct_p | Double | Change detection probability | +| before_year | bigint | cprs_yr | Long | Comparison year | +| after_year | bigint | crtr_yr | Long | Criteria year | +| before_c | text | bf_cls_cd | String | Before classification code | +| before_p | float8 | bf_cls_pro | Double | Before classification probability | +| after_c | text | af_cls_cd | String | After classification code | +| after_p | float8 | af_cls_pro | Double | After classification probability | +| geometry | geom | the_geom | Polygon | Geometry in EPSG:5186 | -**Note**: Probability and classification probability fields are stored as Strings in shapefiles (converted via `String.valueOf()`) to preserve precision. +**Field name source**: See `FeatureTypeFactory.java` (batch/util/FeatureTypeFactory.java:1-104) ### Coordinate Reference System -All geometries use **EPSG:5186** (Korean 2000 / Central Belt). The PostGIS geometry column is `geometry(Polygon, 5186)`, and this CRS is encoded in the output shapefile's `.prj` file via GeoTools. +- **CRS**: EPSG:5186 (Korean 2000 / Central Belt) +- **Valid Coordinate Bounds**: X ∈ [125km, 530km], Y ∈ [-600km, 988km] +- **Encoding**: WKT in SQL → JTS Geometry → GeoTools SimpleFeature → `.prj` file +- **Validation**: Automatic in batch mode via `ST_X(ST_Centroid())` range check ## Dependencies -Key libraries: -- **Spring Boot 3.5.7**: Framework (DI, JDBC, web for RestTemplate) -- **GeoTools 30.0**: Shapefile and GeoJSON generation (`gt-shapefile`, `gt-referencing`, `gt-epsg-hsql`, `gt-geojson`) -- **JTS 1.19.0**: Java Topology Suite for geometry representation -- **PostGIS JDBC 2.5.1**: PostgreSQL spatial extension support -- **PostgreSQL JDBC Driver**: Database connectivity -- **HikariCP**: Connection pooling +**Core Framework**: +- Spring Boot 3.5.7 + - `spring-boot-starter`: DI container, logging + - `spring-boot-starter-jdbc`: JDBC template, HikariCP + - `spring-boot-starter-batch`: Spring Batch framework, job repository + - `spring-boot-starter-web`: RestTemplate for GeoServer API calls + - `spring-boot-starter-validation`: @NotBlank annotations -**Important**: `javax.media:jai_core` is globally excluded in `build.gradle` to avoid conflicts with GeoTools. +**Spatial Libraries**: +- GeoTools 30.0 (via OSGeo repository) + - `gt-shapefile`: Shapefile I/O (DataStore, FeatureStore, Transaction) + - `gt-geojson`: GeoJSON encoding/decoding + - `gt-referencing`: CRS transformations + - `gt-epsg-hsql`: EPSG database for CRS lookups +- JTS 1.19.0: Geometry primitives (Polygon, MultiPolygon, GeometryFactory) +- PostGIS JDBC 2.5.1: PostGIS geometry type support + +**Database**: +- PostgreSQL JDBC Driver (latest) +- HikariCP (bundled with Spring Boot) + +**Build Configuration**: +```gradle +// build.gradle +configurations.all { + exclude group: 'javax.media', module: 'jai_core' // Conflicts with GeoTools +} + +bootJar { + archiveFileName = "shp-exporter.jar" // Fixed JAR name +} + +spotless { + java { + googleJavaFormat('1.19.2') // 2-space indentation + } +} +``` + +## Development Patterns + +### Adding a New Step to Spring Batch Job + +When adding steps to `mergedModeJob`, follow this pattern: + +1. **Create Tasklet or ItemWriter** in `batch/tasklet/` or `batch/writer/` +2. **Define Step Bean** in `MergedModeJobConfig.java`: +```java +@Bean +public Step myNewStep(JobRepository jobRepository, + PlatformTransactionManager transactionManager, + MyTasklet tasklet, + BatchExecutionHistoryListener historyListener) { + return new StepBuilder("myNewStep", jobRepository) + .tasklet(tasklet, transactionManager) + .listener(historyListener) // REQUIRED for history tracking + .build(); +} +``` +3. **Add to Job Flow** in `mergedModeJob()`: +```java +.next(myNewStep) +``` +4. **Always include `BatchExecutionHistoryListener`** to track execution metrics + +### Modifying ItemReader Configuration + +ItemReaders are **not thread-safe**. Each step requires its own instance: + +```java +// WRONG: Sharing reader between steps +@Bean +public JdbcCursorItemReader reader() { ... } + +// RIGHT: Separate readers with @StepScope +@Bean +@StepScope // Creates new instance per step +public JdbcCursorItemReader shapefileReader() { ... } + +@Bean +@StepScope +public JdbcCursorItemReader geoJsonReader() { ... } +``` + +See `InferenceResultItemReaderConfig.java` for working examples. + +### Streaming Writers Pattern + +When writing custom streaming writers, follow `StreamingShapefileWriter` pattern: + +```java +@Component +@StepScope +public class MyStreamingWriter implements ItemStreamWriter { + private Transaction transaction; + + @BeforeStep + public void open(ExecutionContext context) { + // Open resources, start transaction + transaction = new DefaultTransaction("create"); + } + + @Override + public void write(Chunk chunk) { + // Write chunk incrementally + // Do NOT accumulate in memory + } + + @AfterStep + public ExitStatus afterStep(StepExecution stepExecution) { + transaction.commit(); // Commit all chunks + transaction.close(); + return ExitStatus.COMPLETED; + } +} +``` + +### JobParameters and StepExecutionContext + +**Pass data between steps** using `StepExecutionContext`: + +```java +// Step 1: Store data +stepExecution.getExecutionContext().putString("geometryType", "ST_Polygon"); + +// Step 2: Retrieve data +@BeforeStep +public void beforeStep(StepExecution stepExecution) { + String geomType = stepExecution.getJobExecution() + .getExecutionContext() + .getString("geometryType"); +} +``` + +**Job-level parameters** from command line: +```java +// ConverterCommandLineRunner.buildJobParameters() +JobParametersBuilder builder = new JobParametersBuilder(); +builder.addString("inferenceId", converterProperties.getInferenceId()); +builder.addLong("timestamp", System.currentTimeMillis()); // Ensures uniqueness +``` + +### Partitioning Pattern (Map ID Processing) + +The `generateMapIdFilesStep` uses partitioning but runs **sequentially** to avoid DB connection pool exhaustion: + +```java +@Bean +public Step generateMapIdFilesStep(...) { + return new StepBuilder("generateMapIdFilesStep", jobRepository) + .partitioner("mapIdWorker", partitioner) + .step(mapIdWorkerStep) + .taskExecutor(new SyncTaskExecutor()) // SEQUENTIAL execution + .build(); +} +``` + +For parallel execution in future (requires connection pool tuning): +```java +.taskExecutor(new SimpleAsyncTaskExecutor()) +.gridSize(4) // 4 concurrent workers +``` + +### GeoServer REST API Integration + +GeoServer operations use `RestTemplate` with custom error handling: + +```java +// GeoServerRegistrationService.java +try { + restTemplate.exchange(url, HttpMethod.PUT, entity, String.class); +} catch (HttpClientErrorException e) { + if (e.getStatusCode() == HttpStatus.NOT_FOUND) { + // Handle workspace not found + } +} +``` + +Always check workspace existence before layer registration. + +### Testing Considerations + +- **Unit tests**: Mock `JdbcTemplate`, `DataSource` for repository tests +- **Integration tests**: Use `@SpringBatchTest` with embedded H2 database +- **GeoTools**: Use `MemoryDataStore` for shapefile writer tests +- **Current state**: Limited test coverage (focus on critical path validation) + +Refer to `claudedocs/SPRING_BATCH_MIGRATION.md` for detailed batch architecture documentation. diff --git a/shp-exporter/README.md b/shp-exporter/README.md index d34cea6..e83ddf3 100755 --- a/shp-exporter/README.md +++ b/shp-exporter/README.md @@ -111,7 +111,7 @@ java -jar build/libs/shp-exporter.jar \ java -jar build/libs/shp-exporter.jar \ --batch \ - --converter.inference-id=test22 \ + --converter.inference-id=test009 \ --converter.batch-ids[0]=111 \ --converter.batch-ids[1]=114 \ --converter.batch-ids[2]=162 \ diff --git a/shp-exporter/src/main/java/com/kamco/makesample/batch/tasklet/GeoServerRegistrationTasklet.java b/shp-exporter/src/main/java/com/kamco/makesample/batch/tasklet/GeoServerRegistrationTasklet.java index 7d14ccf..9b1cdeb 100644 --- a/shp-exporter/src/main/java/com/kamco/makesample/batch/tasklet/GeoServerRegistrationTasklet.java +++ b/shp-exporter/src/main/java/com/kamco/makesample/batch/tasklet/GeoServerRegistrationTasklet.java @@ -1,6 +1,7 @@ package com.kamco.makesample.batch.tasklet; import com.kamco.makesample.service.GeoServerRegistrationService; +import java.io.File; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.StepContribution; @@ -16,6 +17,9 @@ import org.springframework.stereotype.Component; * *

기존 GeoServerRegistrationService를 재사용하여 shapefile을 GeoServer에 등록 * + *

자동 선택 로직: - 파일 크기 < 100MB: REST API 업로드 (uploadShapefileZip) - 파일 크기 >= 100MB: 파일 경로 참조 + * (registerShapefileByPath) + * *

Conditional execution: geoserver.enabled=false 이면 skip */ @Component @@ -24,6 +28,8 @@ public class GeoServerRegistrationTasklet implements Tasklet { private static final Logger log = LoggerFactory.getLogger(GeoServerRegistrationTasklet.class); + private static final long FILE_SIZE_THRESHOLD = 100 * 1024 * 1024; // 100MB + private final GeoServerRegistrationService geoServerService; @Value("#{jobParameters['geoserver.enabled'] ?: false}") @@ -62,8 +68,29 @@ public class GeoServerRegistrationTasklet implements Tasklet { throw new IllegalStateException("ZIP file path not available for GeoServer registration"); } - // 기존 GeoServerRegistrationService 재사용 - geoServerService.uploadShapefileZip(zipPath, layerName); + // Check file size to determine registration method + File zipFile = new File(zipPath); + long fileSize = zipFile.length(); + long fileSizeMB = fileSize / 1024 / 1024; + + log.info("ZIP file size: {} bytes ({} MB)", fileSize, fileSizeMB); +// +// if (fileSize < FILE_SIZE_THRESHOLD) { +// // Small file: Use REST API upload +// log.info("Using REST API upload method (file size < 100MB)"); +// geoServerService.uploadShapefileZip(zipPath, layerName); +// } else { +// // Large file: Use file path reference +// log.info( +// "Using file path reference method (file size >= 100MB, {} MB recommended for large" +// + " files)", +// fileSizeMB); +// log.info( +// "GeoServer will read the file from: {} (ensure GeoServer has file system access)", +// zipPath); +// geoServerService.registerShapefileByPath(zipPath, layerName); +// } + geoServerService.registerShapefileByPath(zipPath, layerName); log.info("GeoServer registration completed successfully for layer: {}", layerName); diff --git a/shp-exporter/src/main/java/com/kamco/makesample/cli/ConverterCommandLineRunner.java b/shp-exporter/src/main/java/com/kamco/makesample/cli/ConverterCommandLineRunner.java index 8cfa7b4..4542d78 100755 --- a/shp-exporter/src/main/java/com/kamco/makesample/cli/ConverterCommandLineRunner.java +++ b/shp-exporter/src/main/java/com/kamco/makesample/cli/ConverterCommandLineRunner.java @@ -190,35 +190,56 @@ public class ConverterCommandLineRunner implements CommandLineRunner { return; } - String filePath = firstOption(appArgs, "upload-shp"); - String layerName = firstOption(appArgs, "layer"); + // Check for file path reference method (recommended for large files) + String filePathReference = firstOption(appArgs, "register-by-path"); + boolean usePathReference = filePathReference != null && !filePathReference.isBlank(); - if (filePath == null || filePath.isBlank()) { - log.info("No upload requested. Use --upload-shp option to upload a shapefile."); + // Check for upload method (traditional, file size limited) + String uploadFilePath = firstOption(appArgs, "upload-shp"); + boolean useUpload = uploadFilePath != null && !uploadFilePath.isBlank(); + + if (!usePathReference && !useUpload) { + log.info("No upload or registration requested."); printUsage(); return; } + if (usePathReference && useUpload) { + log.error("Cannot use both --upload-shp and --register-by-path at the same time."); + log.error("Choose one method:"); + log.error(" --upload-shp: Upload file content via REST API (< 100MB)"); + log.error(" --register-by-path: Reference file path (500MB+)"); + System.exit(1); + } + + String filePath = usePathReference ? filePathReference : uploadFilePath; + String layerName = firstOption(appArgs, "layer"); + if (layerName == null || layerName.isBlank()) { String fileName = Paths.get(filePath).getFileName().toString(); layerName = fileName.replaceAll("(?i)\\.(zip|shp)$", ""); // 대소문자도 처리 } log.info("========================================"); - log.info("Shapefile Upload to GeoServer"); + log.info("Shapefile {} to GeoServer", usePathReference ? "Registration" : "Upload"); log.info("========================================"); + log.info("Method: {}", usePathReference ? "File Path Reference" : "REST API Upload"); log.info("Input File: {}", filePath); log.info("Layer Name: {}", layerName); log.info("========================================"); try { - geoServerService.uploadShapefileZip(filePath, layerName); + if (usePathReference) { + geoServerService.registerShapefileByPath(filePath, layerName); + } else { + geoServerService.uploadShapefileZip(filePath, layerName); + } log.info("========================================"); - log.info("Upload completed successfully!"); + log.info("{} completed successfully!", usePathReference ? "Registration" : "Upload"); log.info("========================================"); } catch (Exception e) { log.error("========================================"); - log.error("Upload failed: {}", e.getMessage(), e); + log.error("{} failed: {}", usePathReference ? "Registration" : "Upload", e.getMessage(), e); log.error("========================================"); throw e; } @@ -263,21 +284,44 @@ public class ConverterCommandLineRunner implements CommandLineRunner { System.out.println(); System.out.println("Options:"); System.out.println( - " --upload-shp Upload shapefile to GeoServer (.shp or .zip)"); + " --upload-shp Upload shapefile via REST API (< 100MB)"); System.out.println( - " --layer Specify layer name (optional, defaults to filename)"); - System.out.println(" --help, -h Show this help message"); + " --register-by-path Register shapefile using file path (500MB+)"); + System.out.println( + " --layer Specify layer name (optional, defaults to filename)"); + System.out.println(" --help, -h Show this help message"); + System.out.println(); + System.out.println("GeoServer Registration Methods:"); + System.out.println(); + System.out.println(" 1. REST API Upload (--upload-shp):"); + System.out.println(" - Uploads file content to GeoServer via HTTP"); + System.out.println(" - File size limit: < 100MB (HTTP payload limit)"); + System.out.println(" - Use for small to medium files"); + System.out.println(); + System.out.println(" 2. File Path Reference (--register-by-path):"); + System.out.println(" - GeoServer reads file from its local file system"); + System.out.println(" - No file size limit (supports 500MB ~ 2GB+)"); + System.out.println(" - Requirements:"); + System.out.println(" * GeoServer must have file system access to the path"); + System.out.println(" * Path must be absolute (e.g., /data/model_output/...)"); + System.out.println(" * File must be readable by GeoServer user"); System.out.println(); System.out.println("Examples:"); - System.out.println(" # Upload ZIP file directly"); - System.out.println(" java -jar shp-exporter.jar --upload-shp /path/to/shapefile.zip"); System.out.println(); - System.out.println(" # Upload .shp file (will auto-create ZIP with related files)"); - System.out.println(" java -jar shp-exporter.jar --upload-shp /path/to/shapefile.shp"); + System.out.println(" # Small file (< 100MB): Upload via REST API"); + System.out.println(" java -jar shp-exporter.jar --upload-shp /path/to/small_file.zip"); System.out.println(); - System.out.println(" # Specify custom layer name"); + System.out.println(" # Large file (500MB+): Register by file path"); System.out.println( - " java -jar shp-exporter.jar --upload-shp /path/to/shapefile.shp --layer my_layer"); + " java -jar shp-exporter.jar --register-by-path" + + " /data/model_output/export/inference_id/merge/large_file.zip"); + System.out.println(); + System.out.println(" # With custom layer name"); + System.out.println( + " java -jar shp-exporter.jar --register-by-path /path/to/file.zip --layer my_layer"); + System.out.println(); + System.out.println(" # Auto-create ZIP from .shp file (upload method)"); + System.out.println(" java -jar shp-exporter.jar --upload-shp /path/to/shapefile.shp"); System.out.println(); } } diff --git a/shp-exporter/src/main/java/com/kamco/makesample/service/GeoServerRegistrationService.java b/shp-exporter/src/main/java/com/kamco/makesample/service/GeoServerRegistrationService.java index c39100d..af6b208 100644 --- a/shp-exporter/src/main/java/com/kamco/makesample/service/GeoServerRegistrationService.java +++ b/shp-exporter/src/main/java/com/kamco/makesample/service/GeoServerRegistrationService.java @@ -38,12 +38,21 @@ public class GeoServerRegistrationService { this.properties = properties; } + /** + * Register shapefile to GeoServer by uploading file content (REST API) + * + *

LIMITATION: File size limited by HTTP request size (typically < 100MB) Use + * registerShapefileByPath() for larger files (500MB+) + * + * @param filePath Path to shapefile (.shp or .zip) + * @param layerName GeoServer layer name + */ public void uploadShapefileZip(String filePath, String layerName) { String zipFilePath = filePath; boolean tempZipCreated = false; try { - log.info("Starting shapefile upload to GeoServer"); + log.info("Starting shapefile upload to GeoServer (REST API upload)"); log.info("Input file: {}", filePath); log.info("Layer name: {}", layerName); log.info("Workspace: {}", properties.getWorkspace()); @@ -59,6 +68,18 @@ public class GeoServerRegistrationService { log.info("Temporary ZIP created: {}", zipFilePath); } + // Check file size and warn if too large + Path path = Paths.get(zipFilePath); + long fileSize = Files.size(path); + log.info("ZIP file size: {} bytes ({} MB)", fileSize, fileSize / 1024 / 1024); + + if (fileSize > 100 * 1024 * 1024) { // 100MB + log.warn( + "WARNING: File size ({} MB) may exceed HTTP upload limits. Consider using" + + " registerShapefileByPath() for files > 100MB", + fileSize / 1024 / 1024); + } + // Check if layer exists and handle overwrite if (properties.isOverwriteExisting() && layerExists(layerName)) { log.info("Layer '{}' already exists. Deleting...", layerName); @@ -66,9 +87,7 @@ public class GeoServerRegistrationService { } // Read ZIP file - Path path = Paths.get(zipFilePath); byte[] zipData = Files.readAllBytes(path); - log.info("ZIP file size: {} bytes", zipData.length); // Upload to GeoServer String url = @@ -102,6 +121,24 @@ public class GeoServerRegistrationService { "GeoServer upload failed. Status: {}, Response: {}", e.getStatusCode(), e.getResponseBodyAsString()); + + // Provide helpful message for 413 Payload Too Large + if (e.getStatusCode() == HttpStatus.PAYLOAD_TOO_LARGE) { + log.error(""); + log.error("========================================"); + log.error("ERROR: File size exceeds GeoServer upload limit (HTTP 413)"); + log.error(""); + log.error("Solution: Use file path reference method instead:"); + log.error(" 1. Copy shapefile to GeoServer data directory"); + log.error(" 2. Use registerShapefileByPath() method"); + log.error(""); + log.error("Or increase GeoServer upload limits:"); + log.error(" - Tomcat: maxPostSize in server.xml"); + log.error(" - Nginx: client_max_body_size"); + log.error("========================================"); + log.error(""); + } + throw new RuntimeException("GeoServer upload failed", e); } catch (Exception e) { log.error("Unexpected error during shapefile upload", e); @@ -119,6 +156,118 @@ public class GeoServerRegistrationService { } } + /** + * Register shapefile to GeoServer using file:// URL (for large files 500MB+) + * + *

This method does NOT upload file content. Instead, it tells GeoServer to read the file from + * its local file system. + * + *

Requirements: - GeoServer must have file system access to the shapefile path - The path must + * be absolute and accessible from GeoServer server + * + * @param absoluteFilePath Absolute file path to shapefile (.shp or .zip) on GeoServer server + * @param layerName GeoServer layer name + */ + public void registerShapefileByPath(String absoluteFilePath, String layerName) { + try { + log.info("Starting shapefile registration to GeoServer (file path reference)"); + log.info("Input file path: {}", absoluteFilePath); + log.info("Layer name: {}", layerName); + log.info("Workspace: {}", properties.getWorkspace()); + + // Validate inputs + if (absoluteFilePath == null || absoluteFilePath.trim().isEmpty()) { + throw new IllegalArgumentException("File path cannot be empty"); + } + + if (layerName == null || layerName.trim().isEmpty()) { + throw new IllegalArgumentException("Layer name cannot be empty"); + } + + // Verify file exists + File file = new File(absoluteFilePath); + if (!file.exists()) { + throw new IllegalArgumentException("File does not exist: " + absoluteFilePath); + } + + if (!file.isAbsolute()) { + throw new IllegalArgumentException("File path must be absolute: " + absoluteFilePath); + } + + String lowerPath = absoluteFilePath.toLowerCase(); + if (!lowerPath.endsWith(".zip") && !lowerPath.endsWith(".shp")) { + throw new IllegalArgumentException("File must be a .zip or .shp file: " + absoluteFilePath); + } + + log.info("File size: {} MB", file.length() / 1024 / 1024); + + // Check if layer exists and handle overwrite + if (properties.isOverwriteExisting() && layerExists(layerName)) { + log.info("Layer '{}' already exists. Deleting...", layerName); + deleteLayer(layerName); + } + + // Construct file:// URL + String fileUrl = "file://" + absoluteFilePath; + log.info("Using file URL: {}", fileUrl); + + // GeoServer REST API endpoint + String url = + String.format( + "%s/rest/workspaces/%s/datastores/%s/file.shp?configure=all", + properties.getBaseUrl(), properties.getWorkspace(), layerName); + + HttpHeaders headers = createHeaders(); + headers.setContentType(MediaType.TEXT_PLAIN); + + // Send file:// URL as request body + HttpEntity request = new HttpEntity<>(fileUrl, headers); + + log.info("Registering shapefile to GeoServer: {}", url); + ResponseEntity response = + restTemplate.exchange(url, HttpMethod.PUT, request, String.class); + + if (response.getStatusCode() == HttpStatus.CREATED + || response.getStatusCode() == HttpStatus.OK) { + log.info("Shapefile registered successfully to GeoServer"); + log.info( + "Layer '{}' is now available in workspace '{}'", layerName, properties.getWorkspace()); + log.info("GeoServer will read data from: {}", absoluteFilePath); + } else { + log.warn("Unexpected response status: {}", response.getStatusCode()); + } + + } catch (HttpClientErrorException e) { + log.error( + "GeoServer registration failed. Status: {}, Response: {}", + e.getStatusCode(), + e.getResponseBodyAsString()); + + if (e.getStatusCode() == HttpStatus.INTERNAL_SERVER_ERROR) { + log.error(""); + log.error("========================================"); + log.error("ERROR: GeoServer cannot access the file path"); + log.error(""); + log.error("Possible causes:"); + log.error(" 1. File path is not accessible from GeoServer server"); + log.error(" 2. GeoServer user lacks read permissions"); + log.error(" 3. File path format is incorrect (must be absolute path)"); + log.error(""); + log.error("Solutions:"); + log.error(" 1. Verify GeoServer has file system access to: {}", absoluteFilePath); + log.error(" 2. Check file permissions (chmod 644 or similar)"); + log.error(" 3. Ensure path is absolute and correctly formatted"); + log.error("========================================"); + log.error(""); + } + + throw new RuntimeException("GeoServer registration failed", e); + } catch (Exception e) { + log.error("Unexpected error during shapefile registration", e); + throw new RuntimeException("Shapefile registration failed", e); + } + } + private void validateInputs(String filePath, String layerName) { if (filePath == null || filePath.trim().isEmpty()) { throw new IllegalArgumentException("File path cannot be empty");