Merge pull request 'feat/training_260303' (#157) from feat/training_260303 into develop
Reviewed-on: #157
This commit was merged in pull request #157.
This commit is contained in:
@@ -5,8 +5,8 @@ import java.util.List;
|
|||||||
|
|
||||||
public class MonitorDto {
|
public class MonitorDto {
|
||||||
|
|
||||||
public int cpu; // 30초 평균 (%)
|
public int cpu; // 30초 평균 (%)
|
||||||
public String memory; // "3.2/16GB"
|
public String memory; // "3.2/16GB"
|
||||||
public List<Gpu> gpus = new ArrayList<>();
|
public List<Gpu> gpus = new ArrayList<>();
|
||||||
|
|
||||||
public static class Gpu {
|
public static class Gpu {
|
||||||
|
|||||||
@@ -76,7 +76,8 @@ public class GpuDmonReader {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
Thread.sleep(5000); // 5초 후에 시작
|
Thread.sleep(5000); // 5초 후에 시작
|
||||||
} catch (InterruptedException ignored) {}
|
} catch (InterruptedException ignored) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -86,16 +87,12 @@ public class GpuDmonReader {
|
|||||||
private void runDmon() throws Exception {
|
private void runDmon() throws Exception {
|
||||||
|
|
||||||
// GPU utilization만 출력 (-s u)
|
// GPU utilization만 출력 (-s u)
|
||||||
ProcessBuilder pb = new ProcessBuilder(
|
ProcessBuilder pb = new ProcessBuilder("nvidia-smi", "dmon", "-s", "u");
|
||||||
"nvidia-smi", "dmon", "-s", "u"
|
|
||||||
);
|
|
||||||
|
|
||||||
process = pb.start();
|
process = pb.start();
|
||||||
|
|
||||||
// dmon은 stdout으로 계속 데이터를 뿌림 (스트리밍)
|
// dmon은 stdout으로 계속 데이터를 뿌림 (스트리밍)
|
||||||
try (BufferedReader br = new BufferedReader(
|
try (BufferedReader br = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
|
||||||
new InputStreamReader(process.getInputStream()))
|
|
||||||
) {
|
|
||||||
String line;
|
String line;
|
||||||
|
|
||||||
while ((line = br.readLine()) != null) {
|
while ((line = br.readLine()) != null) {
|
||||||
@@ -144,7 +141,8 @@ public class GpuDmonReader {
|
|||||||
if (process != null && process.isAlive()) {
|
if (process != null && process.isAlive()) {
|
||||||
process.destroy();
|
process.destroy();
|
||||||
}
|
}
|
||||||
} catch (Exception ignored) {}
|
} catch (Exception ignored) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isNvidiaAvailable() {
|
private boolean isNvidiaAvailable() {
|
||||||
|
|||||||
@@ -68,9 +68,7 @@ public class SystemMonitorService {
|
|||||||
int index = entry.getKey();
|
int index = entry.getKey();
|
||||||
int util = entry.getValue();
|
int util = entry.getValue();
|
||||||
|
|
||||||
gpuHistory
|
gpuHistory.computeIfAbsent(index, k -> new ArrayDeque<>()).add(util);
|
||||||
.computeIfAbsent(index, k -> new ArrayDeque<>())
|
|
||||||
.add(util);
|
|
||||||
|
|
||||||
Deque<Integer> q = gpuHistory.get(index);
|
Deque<Integer> q = gpuHistory.get(index);
|
||||||
if (q.size() > 30) q.poll();
|
if (q.size() > 30) q.poll();
|
||||||
@@ -185,10 +183,7 @@ public class SystemMonitorService {
|
|||||||
// =====================
|
// =====================
|
||||||
// CPU 평균 (30초)
|
// CPU 평균 (30초)
|
||||||
// =====================
|
// =====================
|
||||||
dto.cpu = (int) cpuHistory.stream()
|
dto.cpu = (int) cpuHistory.stream().mapToDouble(Double::doubleValue).average().orElse(0);
|
||||||
.mapToDouble(Double::doubleValue)
|
|
||||||
.average()
|
|
||||||
.orElse(0);
|
|
||||||
|
|
||||||
// =====================
|
// =====================
|
||||||
// Memory (현재값)
|
// Memory (현재값)
|
||||||
@@ -204,9 +199,7 @@ public class SystemMonitorService {
|
|||||||
|
|
||||||
Deque<Integer> q = gpuHistory.get(index);
|
Deque<Integer> q = gpuHistory.get(index);
|
||||||
|
|
||||||
int avg = (int) (q == null ? 0 :
|
int avg = (int) (q == null ? 0 : q.stream().mapToInt(i -> i).average().orElse(0));
|
||||||
q.stream().mapToInt(i -> i).average().orElse(0)
|
|
||||||
);
|
|
||||||
|
|
||||||
dto.gpus.add(new MonitorDto.Gpu(index, avg));
|
dto.gpus.add(new MonitorDto.Gpu(index, avg));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -220,17 +220,17 @@ public class ModelTrainMngApiController {
|
|||||||
|
|
||||||
@Operation(summary = "학습서버 시스템 사용율 조회", description = "cpu, gpu, memory 사용율 조회")
|
@Operation(summary = "학습서버 시스템 사용율 조회", description = "cpu, gpu, memory 사용율 조회")
|
||||||
@ApiResponses(
|
@ApiResponses(
|
||||||
value = {
|
value = {
|
||||||
@ApiResponse(
|
@ApiResponse(
|
||||||
responseCode = "200",
|
responseCode = "200",
|
||||||
description = "검색 성공",
|
description = "검색 성공",
|
||||||
content =
|
content =
|
||||||
@Content(
|
@Content(
|
||||||
mediaType = "application/json",
|
mediaType = "application/json",
|
||||||
schema = @Schema(implementation = Long.class))),
|
schema = @Schema(implementation = Long.class))),
|
||||||
@ApiResponse(responseCode = "400", description = "잘못된 검색 조건", content = @Content),
|
@ApiResponse(responseCode = "400", description = "잘못된 검색 조건", content = @Content),
|
||||||
@ApiResponse(responseCode = "500", description = "서버 오류", content = @Content)
|
@ApiResponse(responseCode = "500", description = "서버 오류", content = @Content)
|
||||||
})
|
})
|
||||||
@GetMapping("/monitor")
|
@GetMapping("/monitor")
|
||||||
public ApiResponseDto<MonitorDto> getSystem() throws IOException {
|
public ApiResponseDto<MonitorDto> getSystem() throws IOException {
|
||||||
return ApiResponseDto.ok(systemMonitorService.get());
|
return ApiResponseDto.ok(systemMonitorService.get());
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ member:
|
|||||||
init_password: kamco1234!
|
init_password: kamco1234!
|
||||||
|
|
||||||
swagger:
|
swagger:
|
||||||
local-port: 9080
|
local-port: 8080
|
||||||
|
|
||||||
file:
|
file:
|
||||||
sync-root-dir: /app/original-images/
|
sync-root-dir: /app/original-images/
|
||||||
|
|||||||
Reference in New Issue
Block a user