Merge pull request '실패 메시지 저장 추가' (#297) from feat/infer_dev_260107 into develop

Reviewed-on: https://kamco.gitea.gs.dabeeo.com/dabeeo/kamco-dabeeo-backoffice/pulls/297
This commit is contained in:
2026-01-20 20:48:31 +09:00
8 changed files with 415 additions and 83 deletions

View File

@@ -30,6 +30,7 @@ import com.kamco.cd.kamcoback.postgres.repository.Inference.MapSheetAnalDataInfe
import com.kamco.cd.kamcoback.postgres.repository.Inference.MapSheetLearn5kRepository;
import com.kamco.cd.kamcoback.postgres.repository.Inference.MapSheetLearnRepository;
import com.kamco.cd.kamcoback.postgres.repository.scene.MapInkx5kRepository;
import com.kamco.cd.kamcoback.scheduler.dto.JobStatusDto;
import jakarta.persistence.EntityManager;
import jakarta.persistence.EntityNotFoundException;
import jakarta.validation.constraints.NotNull;
@@ -436,13 +437,25 @@ public class InferenceResultCoreService {
/**
* 모델별 도엽별 실패여부 저장
*
* @param uuid learn 테이블 uuid
* @param failMapIds 실패한 도엽 목록
* @param uuid uuid
* @param jobDto job정보
* @param type 모델 타입
*/
@Transactional
public void saveFail5k(UUID uuid, List<Long> failMapIds, String type) {
mapSheetLearn5kRepository.saveFail5k(uuid, failMapIds, type);
public void saveFail5k(UUID uuid, JobStatusDto jobDto, String type) {
mapSheetLearn5kRepository.saveFail5k(uuid, jobDto, type);
}
/**
* 모델별 도엽별 성공 job id저장
*
* @param uuid uuid
* @param jobDto job정보
* @param type 모델 타입
*/
@Transactional
public void saveJobId(UUID uuid, JobStatusDto jobDto, String type) {
mapSheetLearn5kRepository.saveFail5k(uuid, jobDto, type);
}
public Long getInferenceLearnIdByUuid(UUID uuid) {
@@ -486,4 +499,12 @@ public class InferenceResultCoreService {
public List<String> getInferenceRunMapId(UUID uuid) {
return mapSheetLearn5kRepository.getInferenceRunMapId(uuid);
}
public List<Long> findFail5kList(UUID uuid, List<Long> failMapIds, String type) {
return mapSheetLearn5kRepository.findFail5kList(uuid, failMapIds, type);
}
public List<Long> findCompleted5kList(UUID uuid, List<Long> completedIds, String type) {
return mapSheetLearn5kRepository.findCompleted5kList(uuid, completedIds, type);
}
}

View File

@@ -63,4 +63,22 @@ public class MapSheetLearn5kEntity {
@Column(name = "is_m3_fail")
private Boolean isM3Fail = false;
@Column(name = "m1_error_message")
private String m1ErrorMessage;
@Column(name = "m2_error_message")
private String m2ErrorMessage;
@Column(name = "m3_error_message")
private String m3ErrorMessage;
@Column(name = "m1_job_id")
private Long m1JobId;
@Column(name = "m2_job_id")
private Long m2JobId;
@Column(name = "m3_job_id")
private Long m3JobId;
}

View File

@@ -1,11 +1,18 @@
package com.kamco.cd.kamcoback.postgres.repository.Inference;
import com.kamco.cd.kamcoback.scheduler.dto.JobStatusDto;
import java.util.List;
import java.util.UUID;
public interface MapSheetLearn5kRepositoryCustom {
void saveFail5k(UUID uuid, List<Long> failMapIds, String type);
void saveFail5k(UUID uuid, JobStatusDto jobDto, String type);
void saveJobId(UUID uuid, JobStatusDto jobDto, String type);
List<Long> findFail5kList(UUID uuid, List<Long> failIds, String type);
List<Long> findCompleted5kList(UUID uuid, List<Long> completedIds, String type);
List<String> getInferenceRunMapId(UUID uuid);
}

View File

@@ -4,8 +4,13 @@ import static com.kamco.cd.kamcoback.postgres.entity.QMapInkx5kEntity.mapInkx5kE
import static com.kamco.cd.kamcoback.postgres.entity.QMapSheetLearn5kEntity.mapSheetLearn5kEntity;
import static com.kamco.cd.kamcoback.postgres.entity.QMapSheetLearnEntity.mapSheetLearnEntity;
import com.kamco.cd.kamcoback.scheduler.dto.JobStatusDto;
import com.querydsl.core.BooleanBuilder;
import com.querydsl.core.types.dsl.BooleanPath;
import com.querydsl.core.types.dsl.Expressions;
import com.querydsl.core.types.dsl.NumberPath;
import com.querydsl.core.types.dsl.StringPath;
import com.querydsl.jpa.JPAExpressions;
import com.querydsl.jpa.impl.JPAQueryFactory;
import java.util.List;
import java.util.UUID;
@@ -19,49 +24,183 @@ public class MapSheetLearn5kRepositoryImpl implements MapSheetLearn5kRepositoryC
private final JPAQueryFactory queryFactory;
@Override
public void saveFail5k(UUID uuid, List<Long> failMapIds, String type) {
if (failMapIds == null || failMapIds.isEmpty()) {
public void saveFail5k(UUID uuid, JobStatusDto jobDto, String type) {
if (uuid == null || jobDto == null || type == null) {
return;
}
Long id =
queryFactory
.select(mapSheetLearnEntity.id)
.from(mapSheetLearnEntity)
.where(mapSheetLearnEntity.uuid.eq(uuid))
.fetchOne();
if (id == null) {
return;
}
BooleanPath target;
final BooleanPath failPath;
final NumberPath<Long> jobIdPath;
final StringPath errorMsgPath;
switch (type) {
case "M1":
target = mapSheetLearn5kEntity.isM1Fail;
break;
case "M2":
target = mapSheetLearn5kEntity.isM2Fail;
break;
case "M3":
target = mapSheetLearn5kEntity.isM3Fail;
break;
default:
case "M1" -> {
failPath = mapSheetLearn5kEntity.isM1Fail;
jobIdPath = mapSheetLearn5kEntity.m1JobId;
errorMsgPath = mapSheetLearn5kEntity.m1ErrorMessage;
}
case "M2" -> {
failPath = mapSheetLearn5kEntity.isM2Fail;
jobIdPath = mapSheetLearn5kEntity.m2JobId;
errorMsgPath = mapSheetLearn5kEntity.m2ErrorMessage;
}
case "M3" -> {
failPath = mapSheetLearn5kEntity.isM3Fail;
jobIdPath = mapSheetLearn5kEntity.m3JobId;
errorMsgPath = mapSheetLearn5kEntity.m3ErrorMessage;
}
default -> {
return;
}
}
var learnIdSubQuery =
JPAExpressions.select(mapSheetLearnEntity.id)
.from(mapSheetLearnEntity)
.where(mapSheetLearnEntity.uuid.eq(uuid));
queryFactory
.update(mapSheetLearn5kEntity)
.set(target, true)
.set(failPath, Boolean.TRUE)
.set(jobIdPath, jobDto.getId())
.set(errorMsgPath, jobDto.getErrorMessage())
.where(
mapSheetLearn5kEntity
.learn
.id
.eq(id)
.and(mapSheetLearn5kEntity.mapSheetNum.in(failMapIds)))
.eq(learnIdSubQuery)
.and(mapSheetLearn5kEntity.mapSheetNum.eq(Long.valueOf(jobDto.getSceneId()))))
.execute();
}
@Override
public void saveJobId(UUID uuid, JobStatusDto jobDto, String type) {
if (uuid == null || jobDto == null || type == null) {
return;
}
final BooleanPath failPath;
final NumberPath<Long> jobIdPath;
final StringPath errorMsgPath;
switch (type) {
case "M1" -> {
failPath = mapSheetLearn5kEntity.isM1Fail;
jobIdPath = mapSheetLearn5kEntity.m1JobId;
}
case "M2" -> {
failPath = mapSheetLearn5kEntity.isM2Fail;
jobIdPath = mapSheetLearn5kEntity.m2JobId;
}
case "M3" -> {
failPath = mapSheetLearn5kEntity.isM3Fail;
jobIdPath = mapSheetLearn5kEntity.m3JobId;
}
default -> {
return;
}
}
var learnIdSubQuery =
JPAExpressions.select(mapSheetLearnEntity.id)
.from(mapSheetLearnEntity)
.where(mapSheetLearnEntity.uuid.eq(uuid));
queryFactory
.update(mapSheetLearn5kEntity)
.set(failPath, Boolean.FALSE)
.set(jobIdPath, jobDto.getId())
.where(
mapSheetLearn5kEntity
.learn
.id
.eq(learnIdSubQuery)
.and(mapSheetLearn5kEntity.mapSheetNum.eq(Long.valueOf(jobDto.getSceneId()))))
.execute();
}
@Override
public List<Long> findFail5kList(UUID uuid, List<Long> failIds, String type) {
BooleanBuilder builder = new BooleanBuilder();
if (uuid == null || failIds == null || failIds.isEmpty() || type == null) {
return List.of();
}
builder.and(mapSheetLearnEntity.uuid.eq(uuid));
NumberPath<Long> jobIdPath;
BooleanPath failPath;
switch (type) {
case "M1" -> {
jobIdPath = mapSheetLearn5kEntity.m1JobId;
failPath = mapSheetLearn5kEntity.isM1Fail;
}
case "M2" -> {
jobIdPath = mapSheetLearn5kEntity.m2JobId;
failPath = mapSheetLearn5kEntity.isM2Fail;
}
case "M3" -> {
jobIdPath = mapSheetLearn5kEntity.m3JobId;
failPath = mapSheetLearn5kEntity.isM3Fail;
}
default -> {
return List.of();
}
}
return queryFactory
.select(jobIdPath)
.from(mapSheetLearn5kEntity)
.join(mapSheetLearn5kEntity.learn, mapSheetLearnEntity)
.where(
mapSheetLearnEntity
.uuid
.eq(uuid)
.and(failPath.isFalse().or(failPath.isNull()))
.and(jobIdPath.in(failIds)))
.fetch();
}
@Override
public List<Long> findCompleted5kList(UUID uuid, List<Long> completedIds, String type) {
BooleanBuilder builder = new BooleanBuilder();
if (uuid == null || completedIds == null || completedIds.isEmpty() || type == null) {
return List.of();
}
builder.and(mapSheetLearnEntity.uuid.eq(uuid));
NumberPath<Long> jobIdPath;
BooleanPath failPath;
switch (type) {
case "M1" -> {
jobIdPath = mapSheetLearn5kEntity.m1JobId;
}
case "M2" -> {
jobIdPath = mapSheetLearn5kEntity.m2JobId;
}
case "M3" -> {
jobIdPath = mapSheetLearn5kEntity.m3JobId;
}
default -> {
return List.of();
}
}
return queryFactory
.select(jobIdPath)
.from(mapSheetLearn5kEntity)
.join(mapSheetLearn5kEntity.learn, mapSheetLearnEntity)
.where(mapSheetLearnEntity.uuid.eq(uuid).and(jobIdPath.in(completedIds)))
.fetch();
}
@Override
public List<String> getInferenceRunMapId(UUID uuid) {
return queryFactory

View File

@@ -0,0 +1,64 @@
package com.kamco.cd.kamcoback.scheduler.dto;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.time.ZonedDateTime;
import java.util.List;
import lombok.Data;
@Data
public class BatchStatusDto {
private Long id;
@JsonProperty("created_at")
private ZonedDateTime createdAt;
@JsonProperty("input1_year")
private Integer input1Year;
@JsonProperty("input2_year")
private Integer input2Year;
@JsonProperty("total_jobs")
private Integer totalJobs;
@JsonProperty("pending_jobs")
private Integer pendingJobs;
@JsonProperty("running_jobs")
private Integer runningJobs;
@JsonProperty("completed_jobs")
private Integer completedJobs;
@JsonProperty("failed_jobs")
private Integer failedJobs;
private String status;
private List<Object> jobs;
@JsonProperty("completed_ids")
private List<String> completedIds;
@JsonProperty("processing_ids")
private List<String> processingIds;
@JsonProperty("queued_ids")
private List<String> queuedIds;
@JsonProperty("failed_ids")
private List<String> failedIds;
@JsonProperty("completed_scene_ids")
private List<String> completedSceneIds;
@JsonProperty("processing_scene_ids")
private List<String> processingSceneIds;
@JsonProperty("queued_scene_ids")
private List<String> queuedSceneIds;
@JsonProperty("failed_scene_ids")
private List<String> failedSceneIds;
}

View File

@@ -2,51 +2,38 @@ package com.kamco.cd.kamcoback.scheduler.dto;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.time.ZonedDateTime;
import java.util.List;
import lombok.Data;
import lombok.Getter;
import lombok.Setter;
@Data
@Getter
@Setter
public class JobStatusDto {
private Long id;
@JsonProperty("created_at")
private ZonedDateTime createdAt;
@JsonProperty("scene_id")
private String sceneId;
@JsonProperty("input1_year")
private Integer input1Year;
@JsonProperty("input2_year")
private Integer input2Year;
@JsonProperty("total_jobs")
private Integer totalJobs;
@JsonProperty("pending_jobs")
private Integer pendingJobs;
@JsonProperty("running_jobs")
private Integer runningJobs;
@JsonProperty("completed_jobs")
private Integer completedJobs;
@JsonProperty("failed_jobs")
private Integer failedJobs;
@JsonProperty("model_name")
private String modelName;
private String status;
private List<Object> jobs;
@JsonProperty("batch_id")
private Long batchId;
@JsonProperty("completed_ids")
private List<String> completedIds;
@JsonProperty("created_at")
private ZonedDateTime createdAt;
@JsonProperty("processing_ids")
private List<String> processingIds;
@JsonProperty("started_at")
private ZonedDateTime startedAt;
@JsonProperty("queued_ids")
private List<String> queuedIds;
@JsonProperty("finished_at")
private ZonedDateTime finishedAt;
@JsonProperty("failed_ids")
private List<String> failedIds;
@JsonProperty("worker_id")
private Long workerId;
@JsonProperty("error_message")
private String errorMessage;
}

View File

@@ -4,7 +4,6 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.kamco.cd.kamcoback.common.exception.CustomApiException;
import com.kamco.cd.kamcoback.common.service.ExternalJarRunner;
import com.kamco.cd.kamcoback.config.resttemplate.ExternalHttpClient;
import com.kamco.cd.kamcoback.config.resttemplate.ExternalHttpClient.ExternalCallResult;
import com.kamco.cd.kamcoback.inference.dto.InferenceDetailDto.InferenceBatchSheet;
@@ -14,12 +13,15 @@ import com.kamco.cd.kamcoback.inference.dto.InferenceResultDto.Status;
import com.kamco.cd.kamcoback.inference.dto.InferenceResultsTestingDto;
import com.kamco.cd.kamcoback.inference.dto.InferenceSendDto;
import com.kamco.cd.kamcoback.postgres.core.InferenceResultCoreService;
import com.kamco.cd.kamcoback.scheduler.dto.BatchStatusDto;
import com.kamco.cd.kamcoback.scheduler.dto.JobStatusDto;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
@@ -41,7 +43,6 @@ public class MapSheetInferenceJobService {
private final ExternalHttpClient externalHttpClient;
private final ObjectMapper objectMapper;
private final ExternalJarRunner externalJarRunner;
@Value("${inference.batch-url}")
private String batchUrl;
@@ -79,7 +80,7 @@ public class MapSheetInferenceJobService {
}
// 추론실행 상태 정보 가져오기
JobStatusDto job = fetchJobStatus(batchId);
BatchStatusDto job = batchesStatus(batchId);
if (job == null) {
// TODO END가 아닌 상태의 추론목록이 있고 추론 api 호출할때 404면 end로 상태 업데이트
// 학습 상태 종료
@@ -139,7 +140,7 @@ public class MapSheetInferenceJobService {
* @return
* @throws JsonProcessingException
*/
private JobStatusDto fetchJobStatus(Long batchId) throws JsonProcessingException {
private BatchStatusDto batchesStatus(Long batchId) throws JsonProcessingException {
String url = batchUrl + "/" + batchId;
ExternalCallResult<String> result =
@@ -154,6 +155,31 @@ public class MapSheetInferenceJobService {
return null;
}
return objectMapper.readValue(result.body(), BatchStatusDto.class);
}
/**
* job 정보 조회
*
* @param jobId
* @return
* @throws JsonProcessingException
*/
private JobStatusDto jobStatus(Long jobId) throws JsonProcessingException {
String url = inferenceUrl + "/" + jobId;
ExternalCallResult<String> result =
externalHttpClient.call(url, HttpMethod.GET, null, jsonHeaders(), String.class);
int status = result.statusCode();
if (status == 404) {
log.info("Batch not found. batchId={}", jobId);
return null;
}
if (status < 200 || status >= 300) {
return null;
}
return objectMapper.readValue(result.body(), JobStatusDto.class);
}
@@ -170,7 +196,7 @@ public class MapSheetInferenceJobService {
* @param dto
* @return
*/
private boolean isCompleted(JobStatusDto dto) {
private boolean isCompleted(BatchStatusDto dto) {
return dto.getTotalJobs() <= (dto.getCompletedJobs() + dto.getFailedJobs());
}
@@ -180,7 +206,7 @@ public class MapSheetInferenceJobService {
* @param sheet
* @param job
*/
private void onCompleted(InferenceBatchSheet sheet, JobStatusDto job) {
private void onCompleted(InferenceBatchSheet sheet, BatchStatusDto job) {
String currentType = sheet.getRunningModelType();
ZonedDateTime now = ZonedDateTime.now();
@@ -288,7 +314,7 @@ public class MapSheetInferenceJobService {
* @param sheet
* @param job
*/
private void onProcessing(InferenceBatchSheet sheet, JobStatusDto job) {
private void onProcessing(InferenceBatchSheet sheet, BatchStatusDto job) {
SaveInferenceAiDto save = new SaveInferenceAiDto();
save.setUuid(sheet.getUuid());
save.setStatus(Status.IN_PROGRESS.getId());
@@ -298,6 +324,11 @@ public class MapSheetInferenceJobService {
save.setFailedJobs(job.getFailedJobs());
save.setType(sheet.getRunningModelType());
inferenceResultCoreService.update(save);
// 실패 메시지 저장
saveFail5k(job, sheet.getUuid(), sheet.getRunningModelType());
// 성공 job id 저장
saveCompleted5k(job, sheet.getUuid(), sheet.getRunningModelType());
}
/**
@@ -444,7 +475,7 @@ public class MapSheetInferenceJobService {
/** 모델별 추론 종료 update */
private void updateProcessingEndTimeByModel(
JobStatusDto dto, UUID uuid, ZonedDateTime dateTime, String type) {
BatchStatusDto dto, UUID uuid, ZonedDateTime dateTime, String type) {
SaveInferenceAiDto saveInferenceAiDto = new SaveInferenceAiDto();
saveInferenceAiDto.setUuid(uuid);
saveInferenceAiDto.setUpdateUid(0L);
@@ -456,13 +487,10 @@ public class MapSheetInferenceJobService {
saveInferenceAiDto.setFailedJobs(dto.getFailedJobs());
inferenceResultCoreService.update(saveInferenceAiDto);
List<Long> failedIds =
Optional.ofNullable(dto.getFailedIds()).orElse(List.of()).stream()
.map(Long::valueOf)
.toList();
// 도엽별 실패여부 업데이트
inferenceResultCoreService.saveFail5k(uuid, failedIds, type);
// 실패 메시지 저장
saveFail5k(dto, uuid, type);
// 성공 job id 저장
saveCompleted5k(dto, uuid, type);
}
/**
@@ -477,4 +505,72 @@ public class MapSheetInferenceJobService {
request.setUpdateUid(0L);
inferenceResultCoreService.update(request);
}
/**
* 도엽별 실패여부 업데이트
*
* @param dto batch 정보
* @param uuid uuid
* @param type 모델 타입
*/
private void saveFail5k(BatchStatusDto dto, UUID uuid, String type) {
List<Long> failedIds =
Optional.ofNullable(dto.getFailedIds()).orElse(List.of()).stream()
.map(Long::valueOf)
.toList();
List<Long> jobIds = inferenceResultCoreService.findFail5kList(uuid, failedIds, type);
Set<Long> jobIdSet = new HashSet<>(jobIds);
List<Long> remainFailedIds = failedIds.stream().filter(id -> !jobIdSet.contains(id)).toList();
if (remainFailedIds.isEmpty()) {
return;
}
try {
for (Long failedId : remainFailedIds) {
JobStatusDto jobStatusDto = jobStatus(failedId);
inferenceResultCoreService.saveFail5k(uuid, jobStatusDto, type);
}
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
/**
* 도엽별 job id 저장
*
* @param dto batch 정보
* @param uuid uuid
* @param type 모델 타입
*/
private void saveCompleted5k(BatchStatusDto dto, UUID uuid, String type) {
List<Long> completedIds =
Optional.ofNullable(dto.getCompletedIds()).orElse(List.of()).stream()
.map(Long::valueOf)
.toList();
List<Long> jobIds = inferenceResultCoreService.findCompleted5kList(uuid, completedIds, type);
Set<Long> jobIdSet = new HashSet<>(jobIds);
List<Long> remainIds = completedIds.stream().filter(id -> !jobIdSet.contains(id)).toList();
if (remainIds.isEmpty()) {
return;
}
try {
for (Long id : remainIds) {
JobStatusDto jobStatusDto = jobStatus(id);
inferenceResultCoreService.saveJobId(uuid, jobStatusDto, type);
}
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
}

View File

@@ -40,7 +40,7 @@ public class ShpPipelineService {
} catch (Exception e) {
log.error("SHP pipeline failed. inferenceId={}", inferenceId, e);
// TODO 필요하면 실패 상태 업데이트 로직 추가
// TODO 실패 상태 업데이트 로직 추가
} finally {
shpKeyLock.unlock(inferenceId);
}