This commit is contained in:
2025-10-16 15:06:50 +09:00
parent 2fcca115d6
commit 230ea0890d
11 changed files with 1587 additions and 145 deletions

View File

@@ -0,0 +1,315 @@
"""
iDRAC Job Monitoring Service (Redfish 버전)
기존 Flask 앱의 backend/services/ 디렉토리에 추가하세요.
기존 idrac_jobs.py를 이 파일로 교체하거나 redfish_jobs.py로 저장하세요.
"""
import logging
logging.basicConfig(level=logging.DEBUG)
import ipaddress
import time
import logging
import os
from datetime import datetime, timezone
from typing import List, Dict, Any, Optional, Tuple
from collections import OrderedDict
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from .redfish_client import RedfishClient, AuthenticationError, NotSupportedError
logger = logging.getLogger(__name__)
# ────────────────────────────────────────────────────────────
# 설정 (환경변수 또는 기본값)
# ────────────────────────────────────────────────────────────
IDRAC_USER = os.getenv("IDRAC_USER", "root")
IDRAC_PASS = os.getenv("IDRAC_PASS", "calvin")
MAX_WORKERS = int(os.getenv("MAX_WORKERS", "32"))
REDFISH_TIMEOUT = int(os.getenv("REDFISH_TIMEOUT", "15"))
VERIFY_SSL = os.getenv("VERIFY_SSL", "False").lower() == "true"
IP_LIST_PATH = os.getenv("IDRAC_IP_LIST", "data/server_list/idrac_ip_list.txt")
# ────────────────────────────────────────────────────────────
# IP 유효성 검증
# ────────────────────────────────────────────────────────────
def validate_ip(ip: str) -> bool:
"""IP 주소 유효성 검증"""
try:
ipaddress.ip_address(ip.strip())
return True
except ValueError:
return False
def parse_ip_list(text: str) -> List[str]:
"""텍스트에서 IP 목록 파싱"""
if not text:
return []
raw = text.replace(",", "\n").replace(";", "\n")
ips = []
seen = set()
for line in raw.splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
for part in line.split():
part = part.strip()
if not part or part.startswith("#"):
continue
if validate_ip(part) and part not in seen:
seen.add(part)
ips.append(part)
elif not validate_ip(part):
logger.warning(f"Invalid IP address: {part}")
return ips
def load_ip_list(path: str = IP_LIST_PATH) -> List[str]:
"""파일에서 IP 목록 로드"""
try:
file_path = Path(path)
if not file_path.exists():
logger.warning(f"IP list file not found: {path}")
return []
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
ips = parse_ip_list(content)
logger.info(f"Loaded {len(ips)} IPs from {path}")
return ips
except Exception as e:
logger.error(f"Failed to load IP list from {path}: {e}")
return []
# ────────────────────────────────────────────────────────────
# Job 상태 판별
# ────────────────────────────────────────────────────────────
ACTIVE_KEYWORDS = (
"running", "scheduled", "progress", "starting",
"queued", "pending", "preparing", "applying"
)
DONE_KEYWORDS = (
"completed", "success", "succeeded",
"failed", "error", "aborted",
"canceled", "cancelled"
)
def is_active_status(status: Optional[str], message: Optional[str] = None) -> bool:
"""Job이 활성 상태인지 확인"""
s = (status or "").strip().lower()
m = (message or "").strip().lower()
return any(k in s for k in ACTIVE_KEYWORDS) or any(k in m for k in ACTIVE_KEYWORDS)
def is_done_status(status: Optional[str]) -> bool:
"""Job이 완료 상태인지 확인"""
s = (status or "").strip().lower()
return any(k in s for k in DONE_KEYWORDS)
# ────────────────────────────────────────────────────────────
# 날짜/시간 파싱
# ────────────────────────────────────────────────────────────
def parse_iso_datetime(dt_str: Optional[str]) -> Optional[float]:
"""ISO 8601 날짜 문자열을 timestamp로 변환"""
if not dt_str:
return None
try:
dt = datetime.fromisoformat(dt_str.replace("Z", "+00:00"))
return dt.timestamp()
except Exception as e:
logger.debug(f"Failed to parse datetime '{dt_str}': {e}")
return None
def iso_now() -> str:
"""현재 시간을 ISO 8601 포맷으로 반환"""
return datetime.now(timezone.utc).isoformat()
# ────────────────────────────────────────────────────────────
# LRU 캐시
# ────────────────────────────────────────────────────────────
class LRUJobCache:
"""Job 캐시 (LRU 방식)"""
def __init__(self, max_size: int = 10000):
self.cache: OrderedDict[Tuple[str, str], Dict[str, Any]] = OrderedDict()
self.max_size = max_size
self.last_gc = time.time()
def _make_key(self, ip: str, job: Dict[str, Any]) -> Tuple[str, str]:
"""캐시 키 생성"""
jid = (job.get("JID") or "").strip()
if jid:
return (ip, jid)
name = (job.get("Name") or "").strip()
return (ip, f"NOJID::{name}")
def get(self, key: Tuple[str, str]) -> Optional[Dict[str, Any]]:
"""캐시에서 조회"""
if key in self.cache:
self.cache.move_to_end(key)
return self.cache[key]
return None
def set(self, key: Tuple[str, str], value: Dict[str, Any]):
"""캐시에 저장"""
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.max_size:
self.cache.popitem(last=False)
def keys(self) -> List[Tuple[str, str]]:
"""모든 키 반환"""
return list(self.cache.keys())
def pop(self, key: Tuple[str, str], default=None):
"""캐시에서 제거"""
return self.cache.pop(key, default)
def clear_for_ips(self, current_ips: set):
"""현재 IP 목록에 없는 항목 제거"""
removed = 0
for key in list(self.cache.keys()):
if key[0] not in current_ips:
self.cache.pop(key)
removed += 1
if removed > 0:
logger.info(f"Cleared {removed} cache entries for removed IPs")
def gc(self, max_age_seconds: float):
"""오래된 캐시 항목 제거"""
now = time.time()
cutoff = now - max_age_seconds
removed = 0
for key in list(self.cache.keys()):
entry = self.cache[key]
if entry.get("last_seen", 0) < cutoff:
self.cache.pop(key)
removed += 1
if removed > 0:
logger.info(f"Cache GC: removed {removed} entries")
self.last_gc = now
# ────────────────────────────────────────────────────────────
# Job 스캐너
# ────────────────────────────────────────────────────────────
def scan_single_ip(ip: str) -> Dict[str, Any]:
"""
단일 IP에서 Job 조회
Returns:
{
"ip": str,
"ok": bool,
"error": str (실패 시),
"jobs": List[Dict]
}
"""
if not validate_ip(ip):
return {
"ip": ip,
"ok": False,
"error": "Invalid IP address",
"jobs": []
}
try:
with RedfishClient(ip, IDRAC_USER, IDRAC_PASS, REDFISH_TIMEOUT, VERIFY_SSL) as client:
jobs = client.get_jobs()
return {
"ip": ip,
"ok": True,
"jobs": jobs
}
except AuthenticationError:
return {
"ip": ip,
"ok": False,
"error": "Authentication failed",
"jobs": []
}
except NotSupportedError:
return {
"ip": ip,
"ok": False,
"error": "Redfish API not supported (old iDRAC?)",
"jobs": []
}
except TimeoutError as e:
return {
"ip": ip,
"ok": False,
"error": f"Timeout: {str(e)}",
"jobs": []
}
except ConnectionError as e:
return {
"ip": ip,
"ok": False,
"error": f"Connection failed: {str(e)}",
"jobs": []
}
except Exception as e:
logger.exception(f"Unexpected error for {ip}")
return {
"ip": ip,
"ok": False,
"error": f"Unexpected error: {str(e)[:100]}",
"jobs": []
}
def scan_all(ips: List[str], method: str = "redfish", max_workers: int = MAX_WORKERS) -> List[Dict[str, Any]]:
"""
여러 IP를 병렬로 스캔
Args:
ips: IP 목록
method: "redfish" (racadm은 하위 호환용)
max_workers: 병렬 워커 수
Returns:
IP별 결과 리스트 (정렬됨)
"""
if not ips:
return []
logger.info(f"Scanning {len(ips)} IPs with {max_workers} workers (method: {method})")
start_time = time.time()
results = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {
executor.submit(scan_single_ip, ip): ip
for ip in ips
}
for future in as_completed(futures):
results.append(future.result())
elapsed = time.time() - start_time
logger.info(f"Scan completed in {elapsed:.2f}s")
return sorted(results, key=lambda x: x["ip"])

View File

@@ -0,0 +1,241 @@
"""
Dell iDRAC Redfish API Client (수정 버전)
절대 경로와 상대 경로 모두 처리
"""
import requests
import urllib3
from typing import Dict, Any, Optional, List
import logging
from functools import wraps
import time
import os
# SSL 경고 비활성화
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
logger = logging.getLogger(__name__)
def retry_on_failure(max_attempts: int = 2, delay: float = 2.0):
"""재시도 데코레이터"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
last_exception = None
for attempt in range(max_attempts):
try:
return func(*args, **kwargs)
except (requests.Timeout, requests.ConnectionError) as e:
last_exception = e
if attempt < max_attempts - 1:
logger.warning(f"Attempt {attempt + 1} failed, retrying in {delay}s: {e}")
time.sleep(delay * (attempt + 1))
except Exception as e:
raise
raise last_exception
return wrapper
return decorator
class RedfishClient:
"""Dell iDRAC Redfish API 클라이언트"""
def __init__(
self,
ip: str,
username: str,
password: str,
timeout: int = 15,
verify_ssl: bool = False
):
self.ip = ip
self.base_url = f"https://{ip}/redfish/v1"
self.host_url = f"https://{ip}" # ← 추가: 호스트 URL
self.timeout = timeout
self.verify_ssl = verify_ssl
self.session = requests.Session()
self.session.auth = (username, password)
self.session.verify = verify_ssl
self.session.headers.update({
"Content-Type": "application/json",
"Accept": "application/json"
})
@retry_on_failure(max_attempts=2, delay=2.0)
def get(self, endpoint: str) -> Dict[str, Any]:
"""
GET 요청
절대 경로와 상대 경로 모두 처리
"""
# 절대 경로 처리 (이미 /redfish/v1로 시작하는 경우)
if endpoint.startswith('/redfish/v1'):
url = f"{self.host_url}{endpoint}"
# 상대 경로 처리
else:
url = f"{self.base_url}{endpoint}"
logger.debug(f"GET {url}")
response = self.session.get(url, timeout=self.timeout)
response.raise_for_status()
return response.json()
def get_jobs(self) -> List[Dict[str, Any]]:
"""
모든 Job 조회
표준 경로와 Dell OEM 경로 모두 시도
"""
jobs = []
# 1. 표준 Redfish Jobs 경로 시도
try:
standard_jobs = self._get_jobs_standard()
jobs.extend(standard_jobs)
except Exception as e:
logger.warning(f"{self.ip}: Standard Jobs endpoint failed: {e}")
# 2. Dell OEM Jobs 경로 시도
try:
oem_jobs = self._get_jobs_dell_oem()
jobs.extend(oem_jobs)
except Exception as e:
logger.warning(f"{self.ip}: Dell OEM Jobs endpoint failed: {e}")
if not jobs:
logger.info(f"{self.ip}: No jobs found")
return []
# 중복 제거 (JID 기준)
seen_jids = set()
unique_jobs = []
for job in jobs:
jid = job.get("JID", "")
if jid and jid not in seen_jids:
seen_jids.add(jid)
unique_jobs.append(job)
logger.info(f"{self.ip}: Retrieved {len(unique_jobs)} unique jobs")
return sorted(unique_jobs, key=lambda x: x.get("JID", ""))
def _get_jobs_standard(self) -> List[Dict[str, Any]]:
"""표준 Redfish Jobs 조회"""
jobs_endpoint = "/Managers/iDRAC.Embedded.1/Jobs"
jobs_collection = self.get(jobs_endpoint)
members = jobs_collection.get("Members", [])
if not members:
return []
jobs = []
for member in members:
job_path = member.get("@odata.id", "")
if not job_path:
continue
try:
job_data = self.get(job_path)
normalized_job = self._normalize_job(job_data)
jobs.append(normalized_job)
except Exception as e:
logger.warning(f"{self.ip}: Failed to get job {job_path}: {e}")
continue
return jobs
def _get_jobs_dell_oem(self) -> List[Dict[str, Any]]:
"""Dell OEM Jobs 조회"""
oem_endpoint = "/Managers/iDRAC.Embedded.1/Oem/Dell/Jobs"
try:
jobs_collection = self.get(oem_endpoint)
except requests.HTTPError as e:
if e.response.status_code == 404:
logger.debug(f"{self.ip}: Dell OEM endpoint not available")
return []
raise
members = jobs_collection.get("Members", [])
if not members:
return []
jobs = []
for member in members:
job_path = member.get("@odata.id", "")
if not job_path:
continue
try:
job_data = self.get(job_path)
normalized_job = self._normalize_job(job_data)
jobs.append(normalized_job)
except Exception as e:
logger.warning(f"{self.ip}: Failed to get Dell OEM job {job_path}: {e}")
continue
return jobs
def _normalize_job(self, job_data: Dict[str, Any]) -> Dict[str, Any]:
"""Redfish Job 데이터를 표준 포맷으로 변환"""
percent = job_data.get("PercentComplete", 0)
if percent is None:
percent = 0
# JobState 매핑
job_state = job_data.get("JobState", "Unknown")
status_map = {
"New": "Scheduled",
"Starting": "Starting",
"Running": "Running",
"Completed": "Completed",
"Failed": "Failed",
"CompletedWithErrors": "Completed with Errors",
"Pending": "Pending",
"Paused": "Paused",
"Stopping": "Stopping",
"Cancelled": "Cancelled",
"Cancelling": "Cancelling"
}
status = status_map.get(job_state, job_state)
# 메시지 처리
messages = job_data.get("Messages", [])
message_text = ""
if messages and isinstance(messages, list):
if messages[0] and isinstance(messages[0], dict):
message_text = messages[0].get("Message", "")
if not message_text:
message_text = job_data.get("Message", "")
return {
"JID": job_data.get("Id", ""),
"Name": job_data.get("Name", ""),
"Status": status,
"PercentComplete": str(percent),
"Message": message_text,
"ScheduledStartTime": job_data.get("ScheduledStartTime", ""),
"StartTime": job_data.get("StartTime", ""),
"EndTime": job_data.get("EndTime", ""),
"LastUpdateTime": job_data.get("EndTime") or job_data.get("StartTime", ""),
}
def close(self):
"""세션 종료"""
self.session.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
# 커스텀 예외
class AuthenticationError(Exception):
"""인증 실패"""
pass
class NotSupportedError(Exception):
"""지원하지 않는 기능"""
pass