[KC-103] 추론 실행 배치 수정

This commit is contained in:
2026-01-14 15:24:14 +09:00
parent 326591c4bd
commit 4d52e2a97a
5 changed files with 499 additions and 308 deletions

View File

@@ -11,7 +11,6 @@ import com.kamco.cd.kamcoback.inference.dto.InferenceProgressDto;
import com.kamco.cd.kamcoback.inference.dto.InferenceResultDto.SaveInferenceAiDto;
import com.kamco.cd.kamcoback.inference.dto.InferenceResultDto.Status;
import com.kamco.cd.kamcoback.inference.dto.InferenceSendDto;
import com.kamco.cd.kamcoback.inference.dto.InferenceSendDto.pred_requests_areas;
import com.kamco.cd.kamcoback.postgres.core.InferenceResultCoreService;
import com.kamco.cd.kamcoback.scheduler.dto.JobStatusDto;
import jakarta.transaction.Transactional;
@@ -48,137 +47,247 @@ public class MapSheetInferenceJobService {
@Value("${inference.url}")
private String inferenceUrl;
/** 추론 진행 배치 1분 */
/**
* 추론 진행 배치 1분
*/
@Scheduled(fixedDelay = 60_000)
@Transactional
public void runBatch() {
if ("local".equalsIgnoreCase(profile)) {
if (isLocalProfile()) {
return;
}
try {
InferenceBatchSheet batchSheet =
inferenceResultCoreService.getInferenceResultByStatus(Status.IN_PROGRESS.getId());
if (batchSheet == null) {
// 진행중 배치 조회
InferenceBatchSheet sheet = findInProgressSheet();
if (sheet == null) {
return;
}
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
headers.setAccept(List.of(MediaType.APPLICATION_JSON));
Long batchId = 0L;
if (batchSheet.getM3BatchId() != null) {
batchId = batchSheet.getM3BatchId();
} else if (batchSheet.getM2BatchId() != null) {
batchId = batchSheet.getM2BatchId();
} else if (batchSheet.getM1BatchId() != null) {
batchId = batchSheet.getM1BatchId();
}
if (batchId == 0L) {
// 배치 아이디 가져오기
Long batchId = resolveBatchId(sheet);
if (batchId == null || batchId == 0L) {
return;
}
String url = batchUrl + "/" + batchId;
ExternalCallResult<String> result =
externalHttpClient.call(url, HttpMethod.GET, null, headers, String.class);
int status = result.statusCode();
if (status < 200 || status >= 300) {
// 추론실행 상태 정보 가져오기
JobStatusDto job = fetchJobStatus(batchId);
if (job == null) {
return;
}
String json = result.body();
JobStatusDto dto = objectMapper.readValue(json, JobStatusDto.class);
int totalJobs = dto.getTotalJobs();
int completedJobs = dto.getCompletedJobs();
int failedJobs = dto.getFailedJobs();
// 성공, 실패 값 더해서 total 과 같으면 완료
String inferStatus = setStatus(totalJobs, completedJobs, failedJobs);
if ("COMPLETED".equals(inferStatus)) {
String type = batchSheet.getRunningModelType();
if (type.equals("M1")) {
// M1 완료되었으면 M2 실행
startInference(
batchSheet.getId(), batchSheet.getUuid(), "M2", batchSheet.getM2ModelUuid());
// 종료시간
updateProcessingEndTimeByModel(batchSheet.getUuid(), ZonedDateTime.now(), "M1");
} else if (type.equals("M2")) {
// M2 완료되었으면 M3 실행
startInference(
batchSheet.getId(), batchSheet.getUuid(), "M3", batchSheet.getM3ModelUuid());
// 종료시간
updateProcessingEndTimeByModel(batchSheet.getUuid(), ZonedDateTime.now(), "M2");
} else if (type.equals("M3")) {
// 완료
SaveInferenceAiDto saveInferenceAiDto = new SaveInferenceAiDto();
saveInferenceAiDto.setUuid(batchSheet.getUuid());
saveInferenceAiDto.setStatus(Status.END.getId());
saveInferenceAiDto.setInferEndDttm(ZonedDateTime.now());
saveInferenceAiDto.setType(type);
inferenceResultCoreService.update(saveInferenceAiDto);
// 종료시간
updateProcessingEndTimeByModel(batchSheet.getUuid(), ZonedDateTime.now(), "M3");
}
if (isCompleted(job)) {
// 완료 처리
onCompleted(sheet, job);
} else {
SaveInferenceAiDto saveInferenceAiDto = new SaveInferenceAiDto();
saveInferenceAiDto.setUuid(batchSheet.getUuid());
saveInferenceAiDto.setStatus(Status.IN_PROGRESS.getId());
saveInferenceAiDto.setDetectEndCnt((long) (completedJobs + failedJobs));
inferenceResultCoreService.update(saveInferenceAiDto);
// 진행중 처리
onProcessing(sheet, job);
}
} catch (JsonProcessingException e) {
Thread.currentThread().interrupt();
log.error("배치 중 인터럽트 발생", e);
// JSON 파싱 오류는 interrupt 대상 아님
log.error("배치 중 JSON 파싱 오류", e);
} catch (Exception e) {
log.error("배치 처리 중 예외", e);
}
}
private void startInference(Long id, UUID uuid, String type, UUID modelUuid) {
/**
* 진행중 배치 조회
*
* @return
*/
private InferenceBatchSheet findInProgressSheet() {
return inferenceResultCoreService.getInferenceResultByStatus(Status.IN_PROGRESS.getId());
}
InferenceProgressDto progressDto =
inferenceResultCoreService.getInferenceAiResultById(id, type, modelUuid);
/**
* batchId 결정
*
* @param sheet
* @return
*/
private Long resolveBatchId(InferenceBatchSheet sheet) {
// M3 > M2 > M1
if (sheet.getM3BatchId() != null) {
return sheet.getM3BatchId();
}
if (sheet.getM2BatchId() != null) {
return sheet.getM2BatchId();
}
if (sheet.getM1BatchId() != null) {
return sheet.getM1BatchId();
}
return 0L;
}
String inferenceType = "";
/**
* 추론실행 상태 정보 가져오기
*
* @param batchId
* @return
* @throws JsonProcessingException
*/
private JobStatusDto fetchJobStatus(Long batchId) throws JsonProcessingException {
String url = batchUrl + "/" + batchId;
if (type.equals("M1")) {
inferenceType = "G1";
} else if (type.equals("M2")) {
inferenceType = "G2";
} else if (type.equals("M3")) {
inferenceType = "G3";
ExternalCallResult<String> result =
externalHttpClient.call(url, HttpMethod.GET, null, jsonHeaders(), String.class);
int status = result.statusCode();
if (status < 200 || status >= 300) {
return null;
}
pred_requests_areas predRequestsAreas = new pred_requests_areas();
return objectMapper.readValue(result.body(), JobStatusDto.class);
}
private HttpHeaders jsonHeaders() {
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
headers.setAccept(List.of(MediaType.APPLICATION_JSON));
return headers;
}
/**
* 완료 판단
*
* @param dto
* @return
*/
private boolean isCompleted(JobStatusDto dto) {
return dto.getTotalJobs() <= (dto.getCompletedJobs() + dto.getFailedJobs());
}
/**
* 완료 처리
*
* @param sheet
* @param job
*/
private void onCompleted(InferenceBatchSheet sheet, JobStatusDto job) {
String currentType = sheet.getRunningModelType();
ZonedDateTime now = ZonedDateTime.now();
// 현재 모델 종료 업데이트
updateProcessingEndTimeByModel(job, sheet.getUuid(), now, currentType);
// M3이면 전체 종료
if ("M3".equals(currentType)) {
endAll(sheet, now);
return;
}
// 다음 모델 실행 (M1->M2, M2->M3)
String nextType = nextModelType(currentType);
UUID nextModelUuid = resolveModelUuid(sheet, nextType);
// 다음 모델 추론 실행
startInference(sheet.getId(), sheet.getUuid(), nextType, nextModelUuid);
}
/**
* 추론 종료 할때 update
*
* @param sheet
* @param now
*/
private void endAll(InferenceBatchSheet sheet, ZonedDateTime now) {
SaveInferenceAiDto save = new SaveInferenceAiDto();
save.setUuid(sheet.getUuid());
save.setStatus(Status.END.getId());
save.setInferEndDttm(now);
save.setType("M3"); // 마지막 모델 기준
inferenceResultCoreService.update(save);
}
/**
* 다음 실행할 모델 타입 조회
*
* @param currentType
* @return
*/
private String nextModelType(String currentType) {
if ("M1".equals(currentType)) {
return "M2";
}
if ("M2".equals(currentType)) {
return "M3";
}
throw new IllegalArgumentException("Unknown runningModelType: " + currentType);
}
/**
* 모델 정보 UUID 가져오기
*
* @param sheet
* @param type
* @return
*/
private UUID resolveModelUuid(InferenceBatchSheet sheet, String type) {
if ("M1".equals(type)) {
return sheet.getM1ModelUuid();
}
if ("M2".equals(type)) {
return sheet.getM2ModelUuid();
}
if ("M3".equals(type)) {
return sheet.getM3ModelUuid();
}
throw new IllegalArgumentException("Unknown type: " + type);
}
/**
* 진행중 처리
*
* @param sheet
* @param job
*/
private void onProcessing(InferenceBatchSheet sheet, JobStatusDto job) {
SaveInferenceAiDto save = new SaveInferenceAiDto();
save.setUuid(sheet.getUuid());
save.setStatus(Status.IN_PROGRESS.getId());
save.setPendingJobs(job.getPendingJobs());
save.setRunningJobs(job.getRunningJobs());
save.setCompletedJobs(job.getCompletedJobs());
save.setFailedJobs(job.getFailedJobs());
inferenceResultCoreService.update(save);
}
/**
* 다음 모델 추론 실행
*
* @param id
* @param uuid
* @param type
* @param modelUuid
*/
private void startInference(Long id, UUID uuid, String type, UUID modelUuid) {
// 추론 실행 api 파라미터 조회
InferenceProgressDto progressDto =
inferenceResultCoreService.getInferenceAiResultById(id, modelUuid);
// ai 에 맞는 모델 명으로 변경
String inferenceType = modelToInferenceType(type);
InferenceSendDto.pred_requests_areas predRequestsAreas = new InferenceSendDto.pred_requests_areas();
predRequestsAreas.setInput1_year(progressDto.getPred_requests_areas().getInput1_year());
predRequestsAreas.setInput2_year(progressDto.getPred_requests_areas().getInput2_year());
predRequestsAreas.setInput1_scene_path(
progressDto.getPred_requests_areas().getInput1_scene_path());
predRequestsAreas.setInput2_scene_path(
progressDto.getPred_requests_areas().getInput2_scene_path());
predRequestsAreas.setInput1_scene_path(progressDto.getPred_requests_areas().getInput1_scene_path());
predRequestsAreas.setInput2_scene_path(progressDto.getPred_requests_areas().getInput2_scene_path());
InferenceSendDto m = new InferenceSendDto();
m.setPred_requests_areas(predRequestsAreas);
m.setModel_version(progressDto.getModelVersion());
m.setCd_model_path(progressDto.getCdModelPath() + "/" + progressDto.getCdModelFileName());
m.setCd_model_config(
progressDto.getCdModelConfigPath() + "/" + progressDto.getCdModelConfigFileName());
m.setCls_model_path(
progressDto.getCdModelClsPath() + "/" + progressDto.getCdModelClsFileName());
m.setCd_model_config(progressDto.getCdModelConfigPath() + "/" + progressDto.getCdModelConfigFileName());
m.setCls_model_path(progressDto.getCdModelClsPath() + "/" + progressDto.getCdModelClsFileName());
m.setCls_model_version(progressDto.getClsModelVersion());
m.setCd_model_type(inferenceType);
m.setPriority(progressDto.getPriority());
// 추론 다음모델 실행
// 추론 실행 api 호출
Long batchId = ensureAccepted(m);
SaveInferenceAiDto saveInferenceAiDto = new SaveInferenceAiDto();
@@ -191,71 +300,111 @@ public class MapSheetInferenceJobService {
inferenceResultCoreService.update(saveInferenceAiDto);
}
/**
* ai 에 맞는 모델 명으로 변경
*
* @param type 모델 타입
* @return String
*/
private String modelToInferenceType(String type) {
if ("M1".equals(type)) {
return "G1";
}
if ("M2".equals(type)) {
return "G2";
}
if ("M3".equals(type)) {
return "G3";
}
throw new IllegalArgumentException("Unknown type: " + type);
}
/**
* api 호출
*
* @param dto
* @return
*/
private Long ensureAccepted(InferenceSendDto dto) {
log.info("dto null? {}", dto == null);
ObjectMapper om = new ObjectMapper();
try {
log.info("dto json={}", om.writeValueAsString(dto));
} catch (Exception e) {
log.error(e.getMessage());
if (dto == null) {
log.warn("not InferenceSendDto dto");
throw new CustomApiException("BAD_REQUEST", HttpStatus.BAD_REQUEST);
}
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
headers.setAccept(List.of(MediaType.APPLICATION_JSON));
// 1) 요청 로그
try {
log.debug("Inference request dto={}", objectMapper.writeValueAsString(dto));
} catch (JsonProcessingException e) {
log.warn("Failed to serialize inference dto", e);
}
// TODO 추후 삭제
// 2) local 환경 임시 처리
if ("local".equals(profile)) {
if (dto.getPred_requests_areas() == null) {
dto.setPred_requests_areas(new InferenceSendDto.pred_requests_areas());
throw new IllegalStateException("pred_requests_areas is null");
}
dto.getPred_requests_areas().setInput1_scene_path("/kamco-nfs/requests/2023_local.geojson");
dto.getPred_requests_areas().setInput2_scene_path("/kamco-nfs/requests/2024_local.geojson");
}
// 3) HTTP 호출
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
headers.setAccept(List.of(MediaType.APPLICATION_JSON));
ExternalCallResult<String> result =
externalHttpClient.call(inferenceUrl, HttpMethod.POST, dto, headers, String.class);
externalHttpClient.call(inferenceUrl, HttpMethod.POST, dto, headers, String.class);
int status = result.statusCode();
String body = result.body();
if (status < 200 || status >= 300) {
if (result.statusCode() < 200 || result.statusCode() >= 300) {
log.error("Inference API failed. status={}, body={}", result.statusCode(), result.body());
throw new CustomApiException("BAD_GATEWAY", HttpStatus.BAD_GATEWAY);
}
Long batchId = 0L;
// 4) 응답 파싱
try {
List<Map<String, Object>> list =
om.readValue(body, new TypeReference<List<Map<String, Object>>>() {});
objectMapper.readValue(result.body(), new TypeReference<>() {
});
Integer batchIdInt = (Integer) list.get(0).get("batch_id");
batchId = batchIdInt.longValue();
if (list.isEmpty()) {
throw new IllegalStateException("Inference response is empty");
}
} catch (Exception e) {
log.error(e.getMessage());
}
Object batchIdObj = list.get(0).get("batch_id");
if (batchIdObj == null) {
throw new IllegalStateException("batch_id not found in response");
}
return batchId;
return Long.valueOf(batchIdObj.toString());
} catch (Exception e) {
log.error("Failed to parse inference response. body={}", result.body(), e);
throw new CustomApiException("INVALID_INFERENCE_RESPONSE", HttpStatus.BAD_GATEWAY);
}
}
private void updateProcessingEndTimeByModel(UUID uuid, ZonedDateTime dateTime, String type) {
/**
* 실행중인 profile
*
* @return
*/
private boolean isLocalProfile() {
return "local".equalsIgnoreCase(profile);
}
/**
* 모델별 추론 종료 update
*/
private void updateProcessingEndTimeByModel(JobStatusDto dto, UUID uuid, ZonedDateTime dateTime, String type) {
SaveInferenceAiDto saveInferenceAiDto = new SaveInferenceAiDto();
saveInferenceAiDto.setUuid(uuid);
saveInferenceAiDto.setUpdateUid(0L);
saveInferenceAiDto.setModelEndDttm(dateTime);
saveInferenceAiDto.setType(type);
saveInferenceAiDto.setPendingJobs(dto.getPendingJobs());
saveInferenceAiDto.setRunningJobs(dto.getRunningJobs());
saveInferenceAiDto.setCompletedJobs(dto.getCompletedJobs());
saveInferenceAiDto.setFailedJobs(dto.getFailedJobs());
inferenceResultCoreService.update(saveInferenceAiDto);
}
private String setStatus(int totalJobs, int completedJobs, int failedJobs) {
if (totalJobs <= (completedJobs + failedJobs)) {
return "COMPLETED";
}
return "PROCESSING";
}
}