shp파일등록방법변경

This commit is contained in:
2026-03-11 08:02:54 +09:00
parent d193ff4ae6
commit cc796ce005
28 changed files with 657 additions and 215 deletions

View File

@@ -76,3 +76,11 @@ docker-compose.override.yml
*.jar
*.class
/build
### Output directories ###
export/
merge/
### Documentation (temporary) ###
claudedocs/

View File

@@ -1,2 +0,0 @@
#Wed Jan 14 15:14:03 KST 2026
gradle.version=8.14.3

8
shp-exporter/.idea/.gitignore generated vendored
View File

@@ -1,8 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@@ -1,9 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<bytecodeTargetLevel target="17" />
</component>
<component name="JavacSettings">
<option name="ADDITIONAL_OPTIONS_STRING" value="-parameters" />
</component>
</project>

View File

@@ -1,19 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GradleMigrationSettings" migrationVersion="1" />
<component name="GradleSettings">
<option name="linkedExternalProjectsSettings">
<GradleProjectSettings>
<option name="delegatedBuild" value="false" />
<option name="testRunner" value="PLATFORM" />
<option name="externalProjectPath" value="$PROJECT_DIR$" />
<option name="modules">
<set>
<option value="$PROJECT_DIR$" />
</set>
</option>
<option name="resolveExternalAnnotations" value="true" />
</GradleProjectSettings>
</option>
</component>
</project>

View File

@@ -1,35 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RemoteRepositoriesConfiguration">
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Maven Central repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="jboss.community" />
<option name="name" value="JBoss Community repository" />
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
</remote-repository>
<remote-repository>
<option name="id" value="MavenRepo" />
<option name="name" value="MavenRepo" />
<option name="url" value="https://repo.maven.apache.org/maven2/" />
</remote-repository>
<remote-repository>
<option name="id" value="maven" />
<option name="name" value="maven" />
<option name="url" value="https://repo.osgeo.org/repository/release/" />
</remote-repository>
<remote-repository>
<option name="id" value="maven2" />
<option name="name" value="maven2" />
<option name="url" value="https://repo.osgeo.org/repository/geotools-releases/" />
</remote-repository>
<remote-repository>
<option name="id" value="maven3" />
<option name="name" value="maven3" />
<option name="url" value="https://repo.osgeo.org/repository/snapshot/" />
</remote-repository>
</component>
</project>

View File

@@ -1,9 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@@ -1,10 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="FrameworkDetectionExcludesConfiguration">
<file type="web" url="file://$PROJECT_DIR$" />
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_17" default="true" project-jdk-name="17" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

View File

@@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>

View File

@@ -4,27 +4,51 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## Project Overview
Spring Boot CLI application that queries PostgreSQL PostGIS spatial data and converts it to ESRI shapefiles and GeoJSON. The application processes AI inference results from the KAMCO database and generates geographic data files for visualization in GIS applications. It also supports automatic registration of shapefiles to GeoServer via REST API.
Spring Boot 3.5.7 CLI application that converts PostgreSQL PostGIS spatial data to ESRI shapefiles and GeoJSON formats. The application uses **Spring Batch** for memory-efficient processing of large datasets (1M+ records) and supports automatic GeoServer layer registration via REST API.
**Key Features**:
- Memory-optimized batch processing (90-95% reduction: 2-13GB → 150-200MB)
- Chunk-based streaming with cursor pagination (fetch-size: 1000)
- Automatic geometry validation and type conversion (MultiPolygon → Polygon)
- Coordinate system validation (EPSG:5186 Korean 2000 / Central Belt)
- Dual execution modes: Spring Batch (recommended) and Legacy mode
## Build and Run Commands
### Build
```bash
./gradlew build
./gradlew build # Full build with tests
./gradlew clean build -x test # Skip tests
./gradlew spotlessApply # Apply Google Java Format (2-space indentation)
./gradlew spotlessCheck # Verify formatting without applying
```
The built JAR will be named `shp-exporter.jar` (configured in `bootJar` task).
Output: `build/libs/shp-exporter.jar` (fixed name, no version suffix)
### Run Application
#### Generate Shapefiles
#### Spring Batch Mode (Recommended)
```bash
./gradlew bootRun
# Generate shapefile + GeoJSON
./gradlew bootRun --args="--batch --converter.batch-ids[0]=252"
# With GeoServer registration
export GEOSERVER_USERNAME=admin
export GEOSERVER_PASSWORD=geoserver
./gradlew bootRun --args="--batch --geoserver.enabled=true --converter.batch-ids[0]=252"
# Using JAR (production)
java -jar build/libs/shp-exporter.jar \
--batch \
--converter.inference-id=D5E46F60FC40B1A8BE0CD1F3547AA6 \
--converter.batch-ids[0]=252 \
--converter.batch-ids[1]=253
```
Or using JAR:
#### Legacy Mode (Small Datasets Only)
```bash
java -jar build/libs/shp-exporter.jar
./gradlew bootRun # No --batch flag
# Warning: May OOM on large datasets
```
#### Upload Shapefile to GeoServer
@@ -71,122 +95,410 @@ By default, the application runs with `spring.profiles.active=prod` (set in `app
## Architecture
### Processing Pipeline
The application follows a layered architecture with a linear data flow:
### Dual Execution Modes
1. **CLI Entry** (`ConverterCommandLineRunner`) → Parses command-line args and routes to either shapefile generation or GeoServer upload
2. **Service Orchestration** (`ShapefileConverterService`) → Coordinates the conversion workflow based on mode (MERGED, MAP_IDS, or RESOLVE)
3. **Data Access** (`InferenceResultRepository`) → Queries PostGIS database using `PreparedStatementCreator` for PostgreSQL array parameters
4. **Geometry Conversion** (`GeometryConverter`) → Converts PostGIS WKT format to JTS Geometry objects using `WKTReader`
5. **File Writing** (`ShapefileWriter`, `GeoJsonWriter`, `ResultZipWriter`) → Generates output files using GeoTools
6. **GeoServer Integration** (`GeoServerRegistrationService`) → Registers shapefiles to GeoServer via REST API (optional)
The application supports two execution modes with distinct processing pipelines:
### Key Design Points
#### Spring Batch Mode (Recommended)
**Trigger**: `--batch` flag
**Use Case**: Large datasets (100K+ records), production workloads
**Memory**: 150-200MB constant (chunk-based streaming)
**Conversion Modes**: The application supports three execution modes controlled by `converter.mode`:
- `MERGED`: Creates a single shapefile for all data matching `batch-ids` (ignores `map-ids`)
- `MAP_IDS`: Processes only the `map-ids` specified in configuration (requires `map-ids` to be set)
- `RESOLVE`: Queries the database for all distinct `map-ids` matching `batch-ids`, then processes each (avoids OS command-line length limits)
- If `mode` is unspecified: defaults to `MERGED` if `map-ids` is empty, otherwise `MAP_IDS`
**Geometry Handling**: Two-step conversion process:
- PostGIS returns geometries as WKT (Well-Known Text) via `ST_AsText(geometry)` in SQL query
- `GeometryConverter` parses WKT to JTS `Geometry` objects using `WKTReader`
- `ShapefileWriter` uses JTS geometries with GeoTools to write shapefile artifacts (.shp, .shx, .dbf, .prj)
**Shapefile Constraints**:
- Validates all geometries are homogeneous (same type) via `ShapefileConverterService.validateGeometries()`
- Shapefiles cannot contain mixed geometry types (e.g., cannot mix Polygon and Point)
- Geometry type determined from first valid geometry in result set
**Output Structure**:
- For MAP_IDS/RESOLVE mode: `{output-base-dir}/{inference-id}/{map-id}/`
- For MERGED mode: `{output-base-dir}/{inference-id}/merge/`
- Each directory contains: `.shp`, `.shx`, `.dbf`, `.prj`, `.geojson`, and `.zip` files
**PostgreSQL Array Parameters**: The repository uses `PreparedStatementCreator` to handle PostgreSQL array syntax:
```java
Array batchIdsArray = con.createArrayOf("bigint", batchIds.toArray());
ps.setArray(1, batchIdsArray);
**Pipeline Flow**:
```
ConverterCommandLineRunner
→ JobLauncher.run(mergedModeJob)
→ Step 1: GeometryTypeValidationTasklet (validates geometry homogeneity)
→ Step 2: generateShapefileStep (chunk-oriented)
→ JdbcCursorItemReader (fetch-size: 1000)
→ FeatureConversionProcessor (InferenceResult → SimpleFeature)
→ StreamingShapefileWriter (chunk-based append)
→ Step 3: generateGeoJsonStep (chunk-oriented, same pattern)
→ Step 4: CreateZipTasklet (creates .zip for GeoServer)
→ Step 5: GeoServerRegistrationTasklet (conditional, if --geoserver.enabled=true)
→ Step 6: generateMapIdFilesStep (partitioned, sequential map_id processing)
```
This enables `WHERE batch_id = ANY(?)` queries.
**GeoServer Integration**:
- Workspace 'cd' must be pre-created in GeoServer before registration
- Uses environment variables `GEOSERVER_USERNAME` and `GEOSERVER_PASSWORD` for authentication
- Supports automatic deletion and re-registration when `overwrite-existing: true`
- Non-blocking: registration failures are logged but don't stop the application
**Key Components**:
- `JdbcCursorItemReader`: Cursor-based streaming (no full result set loading)
- `StreamingShapefileWriter`: Opens GeoTools transaction, writes chunks incrementally, commits at end
- `GeometryTypeValidationTasklet`: Pre-validates with SQL `DISTINCT ST_GeometryType()`, auto-converts MultiPolygon
- `CompositeItemWriter`: Simultaneously writes shapefile and GeoJSON in map_id worker step
#### Legacy Mode
**Trigger**: No `--batch` flag (deprecated)
**Use Case**: Small datasets (<10K records)
**Memory**: 1.4-9GB (loads entire result set)
**Pipeline Flow**:
```
ConverterCommandLineRunner
→ ShapefileConverterService.convertAll()
→ InferenceResultRepository.findByBatchIds() (full List<InferenceResult>)
→ validateGeometries() (in-memory validation)
→ ShapefileWriter.write() (DefaultFeatureCollection accumulation)
→ GeoJsonWriter.write()
```
### Key Design Patterns
**Geometry Type Validation & Auto-Conversion**:
- Pre-validation step runs SQL `SELECT DISTINCT ST_GeometryType(geometry)` to detect mixed types
- Supports automatic conversion: `ST_MultiPolygon``ST_Polygon` (extracts first polygon only)
- Fails fast on unsupported mixed types (e.g., Polygon + LineString)
- Validates EPSG:5186 coordinate bounds (X: 125-530km, Y: -600-988km) and ST_IsValid()
- See `GeometryTypeValidationTasklet` (batch/tasklet/GeometryTypeValidationTasklet.java:1-290)
**WKT to JTS Conversion Pipeline**:
1. PostGIS query returns `ST_AsText(geometry)` as WKT string
2. `GeometryConvertingRowMapper` converts ResultSet row to `InferenceResult` with WKT string (batch/reader/GeometryConvertingRowMapper.java:1-74)
3. `FeatureConversionProcessor` uses `GeometryConverter.parseGeometry()` to convert WKT → JTS Geometry (service/GeometryConverter.java:1-92)
4. `StreamingShapefileWriter` wraps JTS geometry in GeoTools `SimpleFeature` and writes to shapefile
**Chunk-Based Transaction Management** (Spring Batch only):
```java
// StreamingShapefileWriter
@BeforeStep
public void open() {
transaction = new DefaultTransaction("create");
featureStore.setTransaction(transaction); // Long-running transaction
}
@Override
public void write(Chunk<SimpleFeature> chunk) {
ListFeatureCollection collection = new ListFeatureCollection(featureType, chunk.getItems());
featureStore.addFeatures(collection); // Append chunk to shapefile
// chunk goes out of scope → GC eligible
}
@AfterStep
public void afterStep() {
transaction.commit(); // Commit all chunks at once
transaction.close();
}
```
**PostgreSQL Array Parameter Handling**:
```java
// InferenceResultItemReaderConfig uses PreparedStatementSetter
ps -> {
Array batchIdsArray = ps.getConnection().createArrayOf("bigint", batchIds.toArray());
ps.setArray(1, batchIdsArray); // WHERE batch_id = ANY(?)
ps.setString(2, mapId);
}
```
**Output Directory Strategy**:
- Batch mode (MERGED): `{output-base-dir}/{inference-id}/merge/` → Single merged shapefile + GeoJSON
- Batch mode (map_id partitioning): `{output-base-dir}/{inference-id}/{map-id}/` → Per-map_id files
- Legacy mode: `{output-base-dir}/{inference-id}/{map-id}/` (no merge folder)
**GeoServer Registration**:
- Only shapefile ZIP is uploaded (GeoJSON not registered)
- Requires pre-created workspace 'cd' and environment variables for auth
- Conditional execution via JobParameter `geoserver.enabled`
- Non-blocking: failures logged but don't stop batch job
## Configuration
Configuration files are located in `src/main/resources/`:
- `application.yml`: Base configuration (sets active profile)
- `application-prod.yml`: Production database and converter settings
- `application-dev.yml`: Development settings
- `application-local.yml`: Local development settings
### Profile System
- Default profile: `prod` (set in application.yml)
- Configuration hierarchy: `application.yml``application-{profile}.yml`
- Override via: `--spring.profiles.active=dev`
### Converter Configuration
### Key Configuration Properties
**Converter Settings** (`ConverterProperties.java`):
```yaml
converter:
inference-id: 'D5E46F60FC40B1A8BE0CD1F3547AA6'
map-ids: [] # Optional: list of map_ids, or empty for merged mode
batch-ids: [252, 253, 257] # Required: batch ID filter
mode: 'MERGED' # Optional: MERGED, MAP_IDS, or RESOLVE
inference-id: 'D5E46F60FC40B1A8BE0CD1F3547AA6' # Output folder name
batch-ids: [252, 253, 257] # PostgreSQL batch_id filter (required)
map-ids: [] # Legacy mode only (ignored in batch mode)
mode: 'MERGED' # Legacy mode only: MERGED, MAP_IDS, or RESOLVE
output-base-dir: '/data/model_output/export/'
crs: 'EPSG:5186' # Korean 2000 / Central Belt CRS
crs: 'EPSG:5186' # Korean 2000 / Central Belt
batch:
chunk-size: 1000 # Records per chunk (affects memory usage)
fetch-size: 1000 # JDBC cursor fetch size
skip-limit: 100 # Max skippable records per chunk
enable-partitioning: false # Future: parallel map_id processing
```
### GeoServer Configuration
**GeoServer Settings** (`GeoServerProperties.java`):
```yaml
geoserver:
base-url: 'https://kamco.geo-dev.gs.dabeeo.com/geoserver'
workspace: 'cd'
overwrite-existing: true
connection-timeout: 30000
read-timeout: 60000
username: 'admin' # Optional: prefer environment variables
password: 'geoserver' # Optional: prefer environment variables
workspace: 'cd' # Must be pre-created in GeoServer
overwrite-existing: true # Delete existing layer before registration
connection-timeout: 30000 # 30 seconds
read-timeout: 60000 # 60 seconds
# Credentials from environment variables (preferred):
# GEOSERVER_USERNAME, GEOSERVER_PASSWORD
```
**Spring Batch Metadata**:
```yaml
spring:
batch:
job:
enabled: false # Prevent auto-run on startup
jdbc:
initialize-schema: always # Auto-create BATCH_* tables
```
## Database Integration
### Query Pattern
All queries filter by `batch_id = ANY(?)` and include `after_c IS NOT NULL AND after_p IS NOT NULL` to ensure data quality.
### Query Strategies
Primary queries:
- `findByMapId(batchIds, mapId)`: Retrieve records for a specific map_id
- `findByBatchIds(batchIds)`: Retrieve all records for batch_ids (merged mode)
- `findMapIdByBatchIds(batchIds)`: Query distinct map_ids for RESOLVE mode
**Spring Batch Mode** (streaming):
```sql
-- InferenceResultItemReaderConfig.java
SELECT uid, map_id, probability, before_year, after_year,
before_c, before_p, after_c, after_p,
ST_AsText(geometry) as geometry_wkt
FROM inference_results_testing
WHERE batch_id = ANY(?)
AND ST_GeometryType(geometry) IN ('ST_Polygon', 'ST_MultiPolygon')
AND ST_SRID(geometry) = 5186
AND ST_X(ST_Centroid(geometry)) BETWEEN 125000 AND 530000
AND ST_Y(ST_Centroid(geometry)) BETWEEN -600000 AND 988000
AND ST_IsValid(geometry) = true
ORDER BY map_id, uid
-- Uses server-side cursor with fetch-size=1000
```
**Legacy Mode** (full load):
```sql
-- InferenceResultRepository.java
SELECT uid, map_id, probability, before_year, after_year,
before_c, before_p, after_c, after_p,
ST_AsText(geometry) as geometry_wkt
FROM inference_results_testing
WHERE batch_id = ANY(?) AND map_id = ?
-- Returns full List<InferenceResult> in memory
```
**Geometry Type Validation**:
```sql
-- GeometryTypeValidationTasklet.java
SELECT DISTINCT ST_GeometryType(geometry)
FROM inference_results_testing
WHERE batch_id = ANY(?) AND geometry IS NOT NULL
-- Pre-validates homogeneous geometry requirement
```
### Field Mapping
Database columns map to shapefile fields (note: shapefile field names limited to 10 characters):
Database columns map to shapefile fields (10-character limit):
| Database Column | DB Type | Shapefile Field | Shapefile Type |
|-----------------|---------|-----------------|----------------|
| uid | uuid | uid | String |
| map_id | text | map_id | String |
| probability | float8 | chn_dtct_p | String |
| before_year | bigint | cprs_yr | Long |
| after_year | bigint | crtr_yr | Long |
| before_c | text | bf_cls_cd | String |
| before_p | float8 | bf_cls_pro | String |
| after_c | text | af_cls_cd | String |
| after_p | float8 | af_cls_pro | String |
| geometry | geom | the_geom | Polygon |
| Database Column | DB Type | Shapefile Field | Shapefile Type | Notes |
|-----------------|---------|-----------------|----------------|-------|
| uid | uuid | chnDtctId | String | Change detection ID |
| map_id | text | mpqd_no | String | Map quadrant number |
| probability | float8 | chn_dtct_p | Double | Change detection probability |
| before_year | bigint | cprs_yr | Long | Comparison year |
| after_year | bigint | crtr_yr | Long | Criteria year |
| before_c | text | bf_cls_cd | String | Before classification code |
| before_p | float8 | bf_cls_pro | Double | Before classification probability |
| after_c | text | af_cls_cd | String | After classification code |
| after_p | float8 | af_cls_pro | Double | After classification probability |
| geometry | geom | the_geom | Polygon | Geometry in EPSG:5186 |
**Note**: Probability and classification probability fields are stored as Strings in shapefiles (converted via `String.valueOf()`) to preserve precision.
**Field name source**: See `FeatureTypeFactory.java` (batch/util/FeatureTypeFactory.java:1-104)
### Coordinate Reference System
All geometries use **EPSG:5186** (Korean 2000 / Central Belt). The PostGIS geometry column is `geometry(Polygon, 5186)`, and this CRS is encoded in the output shapefile's `.prj` file via GeoTools.
- **CRS**: EPSG:5186 (Korean 2000 / Central Belt)
- **Valid Coordinate Bounds**: X ∈ [125km, 530km], Y ∈ [-600km, 988km]
- **Encoding**: WKT in SQL → JTS Geometry → GeoTools SimpleFeature → `.prj` file
- **Validation**: Automatic in batch mode via `ST_X(ST_Centroid())` range check
## Dependencies
Key libraries:
- **Spring Boot 3.5.7**: Framework (DI, JDBC, web for RestTemplate)
- **GeoTools 30.0**: Shapefile and GeoJSON generation (`gt-shapefile`, `gt-referencing`, `gt-epsg-hsql`, `gt-geojson`)
- **JTS 1.19.0**: Java Topology Suite for geometry representation
- **PostGIS JDBC 2.5.1**: PostgreSQL spatial extension support
- **PostgreSQL JDBC Driver**: Database connectivity
- **HikariCP**: Connection pooling
**Core Framework**:
- Spring Boot 3.5.7
- `spring-boot-starter`: DI container, logging
- `spring-boot-starter-jdbc`: JDBC template, HikariCP
- `spring-boot-starter-batch`: Spring Batch framework, job repository
- `spring-boot-starter-web`: RestTemplate for GeoServer API calls
- `spring-boot-starter-validation`: @NotBlank annotations
**Important**: `javax.media:jai_core` is globally excluded in `build.gradle` to avoid conflicts with GeoTools.
**Spatial Libraries**:
- GeoTools 30.0 (via OSGeo repository)
- `gt-shapefile`: Shapefile I/O (DataStore, FeatureStore, Transaction)
- `gt-geojson`: GeoJSON encoding/decoding
- `gt-referencing`: CRS transformations
- `gt-epsg-hsql`: EPSG database for CRS lookups
- JTS 1.19.0: Geometry primitives (Polygon, MultiPolygon, GeometryFactory)
- PostGIS JDBC 2.5.1: PostGIS geometry type support
**Database**:
- PostgreSQL JDBC Driver (latest)
- HikariCP (bundled with Spring Boot)
**Build Configuration**:
```gradle
// build.gradle
configurations.all {
exclude group: 'javax.media', module: 'jai_core' // Conflicts with GeoTools
}
bootJar {
archiveFileName = "shp-exporter.jar" // Fixed JAR name
}
spotless {
java {
googleJavaFormat('1.19.2') // 2-space indentation
}
}
```
## Development Patterns
### Adding a New Step to Spring Batch Job
When adding steps to `mergedModeJob`, follow this pattern:
1. **Create Tasklet or ItemWriter** in `batch/tasklet/` or `batch/writer/`
2. **Define Step Bean** in `MergedModeJobConfig.java`:
```java
@Bean
public Step myNewStep(JobRepository jobRepository,
PlatformTransactionManager transactionManager,
MyTasklet tasklet,
BatchExecutionHistoryListener historyListener) {
return new StepBuilder("myNewStep", jobRepository)
.tasklet(tasklet, transactionManager)
.listener(historyListener) // REQUIRED for history tracking
.build();
}
```
3. **Add to Job Flow** in `mergedModeJob()`:
```java
.next(myNewStep)
```
4. **Always include `BatchExecutionHistoryListener`** to track execution metrics
### Modifying ItemReader Configuration
ItemReaders are **not thread-safe**. Each step requires its own instance:
```java
// WRONG: Sharing reader between steps
@Bean
public JdbcCursorItemReader<InferenceResult> reader() { ... }
// RIGHT: Separate readers with @StepScope
@Bean
@StepScope // Creates new instance per step
public JdbcCursorItemReader<InferenceResult> shapefileReader() { ... }
@Bean
@StepScope
public JdbcCursorItemReader<InferenceResult> geoJsonReader() { ... }
```
See `InferenceResultItemReaderConfig.java` for working examples.
### Streaming Writers Pattern
When writing custom streaming writers, follow `StreamingShapefileWriter` pattern:
```java
@Component
@StepScope
public class MyStreamingWriter implements ItemStreamWriter<MyType> {
private Transaction transaction;
@BeforeStep
public void open(ExecutionContext context) {
// Open resources, start transaction
transaction = new DefaultTransaction("create");
}
@Override
public void write(Chunk<? extends MyType> chunk) {
// Write chunk incrementally
// Do NOT accumulate in memory
}
@AfterStep
public ExitStatus afterStep(StepExecution stepExecution) {
transaction.commit(); // Commit all chunks
transaction.close();
return ExitStatus.COMPLETED;
}
}
```
### JobParameters and StepExecutionContext
**Pass data between steps** using `StepExecutionContext`:
```java
// Step 1: Store data
stepExecution.getExecutionContext().putString("geometryType", "ST_Polygon");
// Step 2: Retrieve data
@BeforeStep
public void beforeStep(StepExecution stepExecution) {
String geomType = stepExecution.getJobExecution()
.getExecutionContext()
.getString("geometryType");
}
```
**Job-level parameters** from command line:
```java
// ConverterCommandLineRunner.buildJobParameters()
JobParametersBuilder builder = new JobParametersBuilder();
builder.addString("inferenceId", converterProperties.getInferenceId());
builder.addLong("timestamp", System.currentTimeMillis()); // Ensures uniqueness
```
### Partitioning Pattern (Map ID Processing)
The `generateMapIdFilesStep` uses partitioning but runs **sequentially** to avoid DB connection pool exhaustion:
```java
@Bean
public Step generateMapIdFilesStep(...) {
return new StepBuilder("generateMapIdFilesStep", jobRepository)
.partitioner("mapIdWorker", partitioner)
.step(mapIdWorkerStep)
.taskExecutor(new SyncTaskExecutor()) // SEQUENTIAL execution
.build();
}
```
For parallel execution in future (requires connection pool tuning):
```java
.taskExecutor(new SimpleAsyncTaskExecutor())
.gridSize(4) // 4 concurrent workers
```
### GeoServer REST API Integration
GeoServer operations use `RestTemplate` with custom error handling:
```java
// GeoServerRegistrationService.java
try {
restTemplate.exchange(url, HttpMethod.PUT, entity, String.class);
} catch (HttpClientErrorException e) {
if (e.getStatusCode() == HttpStatus.NOT_FOUND) {
// Handle workspace not found
}
}
```
Always check workspace existence before layer registration.
### Testing Considerations
- **Unit tests**: Mock `JdbcTemplate`, `DataSource` for repository tests
- **Integration tests**: Use `@SpringBatchTest` with embedded H2 database
- **GeoTools**: Use `MemoryDataStore` for shapefile writer tests
- **Current state**: Limited test coverage (focus on critical path validation)
Refer to `claudedocs/SPRING_BATCH_MIGRATION.md` for detailed batch architecture documentation.

View File

@@ -111,7 +111,7 @@ java -jar build/libs/shp-exporter.jar \
java -jar build/libs/shp-exporter.jar \
--batch \
--converter.inference-id=test22 \
--converter.inference-id=test009 \
--converter.batch-ids[0]=111 \
--converter.batch-ids[1]=114 \
--converter.batch-ids[2]=162 \

View File

@@ -1,6 +1,7 @@
package com.kamco.makesample.batch.tasklet;
import com.kamco.makesample.service.GeoServerRegistrationService;
import java.io.File;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.StepContribution;
@@ -16,6 +17,9 @@ import org.springframework.stereotype.Component;
*
* <p>기존 GeoServerRegistrationService를 재사용하여 shapefile을 GeoServer에 등록
*
* <p>자동 선택 로직: - 파일 크기 < 100MB: REST API 업로드 (uploadShapefileZip) - 파일 크기 >= 100MB: 파일 경로 참조
* (registerShapefileByPath)
*
* <p>Conditional execution: geoserver.enabled=false 이면 skip
*/
@Component
@@ -24,6 +28,8 @@ public class GeoServerRegistrationTasklet implements Tasklet {
private static final Logger log = LoggerFactory.getLogger(GeoServerRegistrationTasklet.class);
private static final long FILE_SIZE_THRESHOLD = 100 * 1024 * 1024; // 100MB
private final GeoServerRegistrationService geoServerService;
@Value("#{jobParameters['geoserver.enabled'] ?: false}")
@@ -62,8 +68,29 @@ public class GeoServerRegistrationTasklet implements Tasklet {
throw new IllegalStateException("ZIP file path not available for GeoServer registration");
}
// 기존 GeoServerRegistrationService 재사용
geoServerService.uploadShapefileZip(zipPath, layerName);
// Check file size to determine registration method
File zipFile = new File(zipPath);
long fileSize = zipFile.length();
long fileSizeMB = fileSize / 1024 / 1024;
log.info("ZIP file size: {} bytes ({} MB)", fileSize, fileSizeMB);
//
// if (fileSize < FILE_SIZE_THRESHOLD) {
// // Small file: Use REST API upload
// log.info("Using REST API upload method (file size < 100MB)");
// geoServerService.uploadShapefileZip(zipPath, layerName);
// } else {
// // Large file: Use file path reference
// log.info(
// "Using file path reference method (file size >= 100MB, {} MB recommended for large"
// + " files)",
// fileSizeMB);
// log.info(
// "GeoServer will read the file from: {} (ensure GeoServer has file system access)",
// zipPath);
// geoServerService.registerShapefileByPath(zipPath, layerName);
// }
geoServerService.registerShapefileByPath(zipPath, layerName);
log.info("GeoServer registration completed successfully for layer: {}", layerName);

View File

@@ -190,35 +190,56 @@ public class ConverterCommandLineRunner implements CommandLineRunner {
return;
}
String filePath = firstOption(appArgs, "upload-shp");
String layerName = firstOption(appArgs, "layer");
// Check for file path reference method (recommended for large files)
String filePathReference = firstOption(appArgs, "register-by-path");
boolean usePathReference = filePathReference != null && !filePathReference.isBlank();
if (filePath == null || filePath.isBlank()) {
log.info("No upload requested. Use --upload-shp option to upload a shapefile.");
// Check for upload method (traditional, file size limited)
String uploadFilePath = firstOption(appArgs, "upload-shp");
boolean useUpload = uploadFilePath != null && !uploadFilePath.isBlank();
if (!usePathReference && !useUpload) {
log.info("No upload or registration requested.");
printUsage();
return;
}
if (usePathReference && useUpload) {
log.error("Cannot use both --upload-shp and --register-by-path at the same time.");
log.error("Choose one method:");
log.error(" --upload-shp: Upload file content via REST API (< 100MB)");
log.error(" --register-by-path: Reference file path (500MB+)");
System.exit(1);
}
String filePath = usePathReference ? filePathReference : uploadFilePath;
String layerName = firstOption(appArgs, "layer");
if (layerName == null || layerName.isBlank()) {
String fileName = Paths.get(filePath).getFileName().toString();
layerName = fileName.replaceAll("(?i)\\.(zip|shp)$", ""); // 대소문자도 처리
}
log.info("========================================");
log.info("Shapefile Upload to GeoServer");
log.info("Shapefile {} to GeoServer", usePathReference ? "Registration" : "Upload");
log.info("========================================");
log.info("Method: {}", usePathReference ? "File Path Reference" : "REST API Upload");
log.info("Input File: {}", filePath);
log.info("Layer Name: {}", layerName);
log.info("========================================");
try {
if (usePathReference) {
geoServerService.registerShapefileByPath(filePath, layerName);
} else {
geoServerService.uploadShapefileZip(filePath, layerName);
}
log.info("========================================");
log.info("Upload completed successfully!");
log.info("{} completed successfully!", usePathReference ? "Registration" : "Upload");
log.info("========================================");
} catch (Exception e) {
log.error("========================================");
log.error("Upload failed: {}", e.getMessage(), e);
log.error("{} failed: {}", usePathReference ? "Registration" : "Upload", e.getMessage(), e);
log.error("========================================");
throw e;
}
@@ -263,21 +284,44 @@ public class ConverterCommandLineRunner implements CommandLineRunner {
System.out.println();
System.out.println("Options:");
System.out.println(
" --upload-shp <file-path> Upload shapefile to GeoServer (.shp or .zip)");
" --upload-shp <file-path> Upload shapefile via REST API (< 100MB)");
System.out.println(
" --register-by-path <file-path> Register shapefile using file path (500MB+)");
System.out.println(
" --layer <layer-name> Specify layer name (optional, defaults to filename)");
System.out.println(" --help, -h Show this help message");
System.out.println();
System.out.println("Examples:");
System.out.println(" # Upload ZIP file directly");
System.out.println(" java -jar shp-exporter.jar --upload-shp /path/to/shapefile.zip");
System.out.println("GeoServer Registration Methods:");
System.out.println();
System.out.println(" # Upload .shp file (will auto-create ZIP with related files)");
System.out.println(" 1. REST API Upload (--upload-shp):");
System.out.println(" - Uploads file content to GeoServer via HTTP");
System.out.println(" - File size limit: < 100MB (HTTP payload limit)");
System.out.println(" - Use for small to medium files");
System.out.println();
System.out.println(" 2. File Path Reference (--register-by-path):");
System.out.println(" - GeoServer reads file from its local file system");
System.out.println(" - No file size limit (supports 500MB ~ 2GB+)");
System.out.println(" - Requirements:");
System.out.println(" * GeoServer must have file system access to the path");
System.out.println(" * Path must be absolute (e.g., /data/model_output/...)");
System.out.println(" * File must be readable by GeoServer user");
System.out.println();
System.out.println("Examples:");
System.out.println();
System.out.println(" # Small file (< 100MB): Upload via REST API");
System.out.println(" java -jar shp-exporter.jar --upload-shp /path/to/small_file.zip");
System.out.println();
System.out.println(" # Large file (500MB+): Register by file path");
System.out.println(
" java -jar shp-exporter.jar --register-by-path"
+ " /data/model_output/export/inference_id/merge/large_file.zip");
System.out.println();
System.out.println(" # With custom layer name");
System.out.println(
" java -jar shp-exporter.jar --register-by-path /path/to/file.zip --layer my_layer");
System.out.println();
System.out.println(" # Auto-create ZIP from .shp file (upload method)");
System.out.println(" java -jar shp-exporter.jar --upload-shp /path/to/shapefile.shp");
System.out.println();
System.out.println(" # Specify custom layer name");
System.out.println(
" java -jar shp-exporter.jar --upload-shp /path/to/shapefile.shp --layer my_layer");
System.out.println();
}
}

View File

@@ -38,12 +38,21 @@ public class GeoServerRegistrationService {
this.properties = properties;
}
/**
* Register shapefile to GeoServer by uploading file content (REST API)
*
* <p>LIMITATION: File size limited by HTTP request size (typically < 100MB) Use
* registerShapefileByPath() for larger files (500MB+)
*
* @param filePath Path to shapefile (.shp or .zip)
* @param layerName GeoServer layer name
*/
public void uploadShapefileZip(String filePath, String layerName) {
String zipFilePath = filePath;
boolean tempZipCreated = false;
try {
log.info("Starting shapefile upload to GeoServer");
log.info("Starting shapefile upload to GeoServer (REST API upload)");
log.info("Input file: {}", filePath);
log.info("Layer name: {}", layerName);
log.info("Workspace: {}", properties.getWorkspace());
@@ -59,6 +68,18 @@ public class GeoServerRegistrationService {
log.info("Temporary ZIP created: {}", zipFilePath);
}
// Check file size and warn if too large
Path path = Paths.get(zipFilePath);
long fileSize = Files.size(path);
log.info("ZIP file size: {} bytes ({} MB)", fileSize, fileSize / 1024 / 1024);
if (fileSize > 100 * 1024 * 1024) { // 100MB
log.warn(
"WARNING: File size ({} MB) may exceed HTTP upload limits. Consider using"
+ " registerShapefileByPath() for files > 100MB",
fileSize / 1024 / 1024);
}
// Check if layer exists and handle overwrite
if (properties.isOverwriteExisting() && layerExists(layerName)) {
log.info("Layer '{}' already exists. Deleting...", layerName);
@@ -66,9 +87,7 @@ public class GeoServerRegistrationService {
}
// Read ZIP file
Path path = Paths.get(zipFilePath);
byte[] zipData = Files.readAllBytes(path);
log.info("ZIP file size: {} bytes", zipData.length);
// Upload to GeoServer
String url =
@@ -102,6 +121,24 @@ public class GeoServerRegistrationService {
"GeoServer upload failed. Status: {}, Response: {}",
e.getStatusCode(),
e.getResponseBodyAsString());
// Provide helpful message for 413 Payload Too Large
if (e.getStatusCode() == HttpStatus.PAYLOAD_TOO_LARGE) {
log.error("");
log.error("========================================");
log.error("ERROR: File size exceeds GeoServer upload limit (HTTP 413)");
log.error("");
log.error("Solution: Use file path reference method instead:");
log.error(" 1. Copy shapefile to GeoServer data directory");
log.error(" 2. Use registerShapefileByPath() method");
log.error("");
log.error("Or increase GeoServer upload limits:");
log.error(" - Tomcat: maxPostSize in server.xml");
log.error(" - Nginx: client_max_body_size");
log.error("========================================");
log.error("");
}
throw new RuntimeException("GeoServer upload failed", e);
} catch (Exception e) {
log.error("Unexpected error during shapefile upload", e);
@@ -119,6 +156,118 @@ public class GeoServerRegistrationService {
}
}
/**
* Register shapefile to GeoServer using file:// URL (for large files 500MB+)
*
* <p>This method does NOT upload file content. Instead, it tells GeoServer to read the file from
* its local file system.
*
* <p>Requirements: - GeoServer must have file system access to the shapefile path - The path must
* be absolute and accessible from GeoServer server
*
* @param absoluteFilePath Absolute file path to shapefile (.shp or .zip) on GeoServer server
* @param layerName GeoServer layer name
*/
public void registerShapefileByPath(String absoluteFilePath, String layerName) {
try {
log.info("Starting shapefile registration to GeoServer (file path reference)");
log.info("Input file path: {}", absoluteFilePath);
log.info("Layer name: {}", layerName);
log.info("Workspace: {}", properties.getWorkspace());
// Validate inputs
if (absoluteFilePath == null || absoluteFilePath.trim().isEmpty()) {
throw new IllegalArgumentException("File path cannot be empty");
}
if (layerName == null || layerName.trim().isEmpty()) {
throw new IllegalArgumentException("Layer name cannot be empty");
}
// Verify file exists
File file = new File(absoluteFilePath);
if (!file.exists()) {
throw new IllegalArgumentException("File does not exist: " + absoluteFilePath);
}
if (!file.isAbsolute()) {
throw new IllegalArgumentException("File path must be absolute: " + absoluteFilePath);
}
String lowerPath = absoluteFilePath.toLowerCase();
if (!lowerPath.endsWith(".zip") && !lowerPath.endsWith(".shp")) {
throw new IllegalArgumentException("File must be a .zip or .shp file: " + absoluteFilePath);
}
log.info("File size: {} MB", file.length() / 1024 / 1024);
// Check if layer exists and handle overwrite
if (properties.isOverwriteExisting() && layerExists(layerName)) {
log.info("Layer '{}' already exists. Deleting...", layerName);
deleteLayer(layerName);
}
// Construct file:// URL
String fileUrl = "file://" + absoluteFilePath;
log.info("Using file URL: {}", fileUrl);
// GeoServer REST API endpoint
String url =
String.format(
"%s/rest/workspaces/%s/datastores/%s/file.shp?configure=all",
properties.getBaseUrl(), properties.getWorkspace(), layerName);
HttpHeaders headers = createHeaders();
headers.setContentType(MediaType.TEXT_PLAIN);
// Send file:// URL as request body
HttpEntity<String> request = new HttpEntity<>(fileUrl, headers);
log.info("Registering shapefile to GeoServer: {}", url);
ResponseEntity<String> response =
restTemplate.exchange(url, HttpMethod.PUT, request, String.class);
if (response.getStatusCode() == HttpStatus.CREATED
|| response.getStatusCode() == HttpStatus.OK) {
log.info("Shapefile registered successfully to GeoServer");
log.info(
"Layer '{}' is now available in workspace '{}'", layerName, properties.getWorkspace());
log.info("GeoServer will read data from: {}", absoluteFilePath);
} else {
log.warn("Unexpected response status: {}", response.getStatusCode());
}
} catch (HttpClientErrorException e) {
log.error(
"GeoServer registration failed. Status: {}, Response: {}",
e.getStatusCode(),
e.getResponseBodyAsString());
if (e.getStatusCode() == HttpStatus.INTERNAL_SERVER_ERROR) {
log.error("");
log.error("========================================");
log.error("ERROR: GeoServer cannot access the file path");
log.error("");
log.error("Possible causes:");
log.error(" 1. File path is not accessible from GeoServer server");
log.error(" 2. GeoServer user lacks read permissions");
log.error(" 3. File path format is incorrect (must be absolute path)");
log.error("");
log.error("Solutions:");
log.error(" 1. Verify GeoServer has file system access to: {}", absoluteFilePath);
log.error(" 2. Check file permissions (chmod 644 or similar)");
log.error(" 3. Ensure path is absolute and correctly formatted");
log.error("========================================");
log.error("");
}
throw new RuntimeException("GeoServer registration failed", e);
} catch (Exception e) {
log.error("Unexpected error during shapefile registration", e);
throw new RuntimeException("Shapefile registration failed", e);
}
}
private void validateInputs(String filePath, String layerName) {
if (filePath == null || filePath.trim().isEmpty()) {
throw new IllegalArgumentException("File path cannot be empty");