|
@@ -3,6 +3,7 @@ Host monitoring service for collecting system metrics.
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
import asyncio
|
|
import asyncio
|
|
|
|
|
+import time
|
|
|
from datetime import datetime, timedelta, timezone
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
|
|
|
|
import psutil
|
|
import psutil
|
|
@@ -20,53 +21,152 @@ class HostMonitor:
|
|
|
def __init__(self):
|
|
def __init__(self):
|
|
|
self.previous_disk_io = None
|
|
self.previous_disk_io = None
|
|
|
self.previous_net_io = None
|
|
self.previous_net_io = None
|
|
|
|
|
+ self.previous_cpu_stats = None
|
|
|
|
|
+ self.previous_timestamp = None
|
|
|
self.running = False
|
|
self.running = False
|
|
|
|
|
|
|
|
async def collect_metrics(self) -> dict:
|
|
async def collect_metrics(self) -> dict:
|
|
|
- """Collect current system metrics."""
|
|
|
|
|
- # CPU
|
|
|
|
|
|
|
+ """Collect comprehensive system metrics."""
|
|
|
|
|
+ current_timestamp = time.time()
|
|
|
|
|
+
|
|
|
|
|
+ # CPU - detailed
|
|
|
cpu_percent = psutil.cpu_percent(interval=1)
|
|
cpu_percent = psutil.cpu_percent(interval=1)
|
|
|
cpu_count = psutil.cpu_count()
|
|
cpu_count = psutil.cpu_count()
|
|
|
|
|
+ cpu_per_core = psutil.cpu_percent(interval=0, percpu=True)
|
|
|
|
|
+ cpu_times = psutil.cpu_times()
|
|
|
|
|
+ cpu_stats = psutil.cpu_stats()
|
|
|
|
|
+
|
|
|
|
|
+ # Context switches and interrupts (delta)
|
|
|
|
|
+ context_switches = cpu_stats.ctx_switches
|
|
|
|
|
+ interrupts = cpu_stats.interrupts
|
|
|
|
|
+ ctx_switches_per_sec = 0
|
|
|
|
|
+ interrupts_per_sec = 0
|
|
|
|
|
+
|
|
|
|
|
+ if self.previous_cpu_stats:
|
|
|
|
|
+ time_delta = current_timestamp - self.previous_timestamp
|
|
|
|
|
+ if time_delta > 0:
|
|
|
|
|
+ ctx_switches_per_sec = (context_switches - self.previous_cpu_stats.ctx_switches) / time_delta
|
|
|
|
|
+ interrupts_per_sec = (interrupts - self.previous_cpu_stats.interrupts) / time_delta
|
|
|
|
|
|
|
|
- # Memory
|
|
|
|
|
|
|
+ self.previous_cpu_stats = cpu_stats
|
|
|
|
|
+
|
|
|
|
|
+ # Memory - detailed
|
|
|
mem = psutil.virtual_memory()
|
|
mem = psutil.virtual_memory()
|
|
|
- memory_total = mem.total
|
|
|
|
|
- memory_used = mem.used
|
|
|
|
|
- memory_percent = mem.percent
|
|
|
|
|
|
|
+ swap = psutil.swap_memory()
|
|
|
|
|
|
|
|
# Load Average
|
|
# Load Average
|
|
|
load_avg = psutil.getloadavg()
|
|
load_avg = psutil.getloadavg()
|
|
|
load_1, load_5, load_15 = load_avg
|
|
load_1, load_5, load_15 = load_avg
|
|
|
|
|
|
|
|
- # Disk I/O
|
|
|
|
|
|
|
+ # Disk I/O - with IOPS and throughput
|
|
|
disk_io = psutil.disk_io_counters()
|
|
disk_io = psutil.disk_io_counters()
|
|
|
- disk_read_bytes = disk_io.read_bytes
|
|
|
|
|
- disk_write_bytes = disk_io.write_bytes
|
|
|
|
|
-
|
|
|
|
|
- # Disk Usage
|
|
|
|
|
disk_usage = psutil.disk_usage('/')
|
|
disk_usage = psutil.disk_usage('/')
|
|
|
- disk_usage_percent = disk_usage.percent
|
|
|
|
|
|
|
|
|
|
- # Network
|
|
|
|
|
|
|
+ # Calculate disk deltas (IOPS, throughput)
|
|
|
|
|
+ disk_read_iops = 0
|
|
|
|
|
+ disk_write_iops = 0
|
|
|
|
|
+ disk_read_mbps = 0
|
|
|
|
|
+ disk_write_mbps = 0
|
|
|
|
|
+
|
|
|
|
|
+ if self.previous_disk_io and self.previous_timestamp:
|
|
|
|
|
+ time_delta = current_timestamp - self.previous_timestamp
|
|
|
|
|
+ if time_delta > 0:
|
|
|
|
|
+ disk_read_iops = (disk_io.read_count - self.previous_disk_io.read_count) / time_delta
|
|
|
|
|
+ disk_write_iops = (disk_io.write_count - self.previous_disk_io.write_count) / time_delta
|
|
|
|
|
+ disk_read_mbps = ((disk_io.read_bytes - self.previous_disk_io.read_bytes) / time_delta) / (1024 * 1024)
|
|
|
|
|
+ disk_write_mbps = ((disk_io.write_bytes - self.previous_disk_io.write_bytes) / time_delta) / (1024 * 1024)
|
|
|
|
|
+
|
|
|
|
|
+ self.previous_disk_io = disk_io
|
|
|
|
|
+
|
|
|
|
|
+ # Network - with packets and throughput
|
|
|
net_io = psutil.net_io_counters()
|
|
net_io = psutil.net_io_counters()
|
|
|
- net_sent_bytes = net_io.bytes_sent
|
|
|
|
|
- net_recv_bytes = net_io.bytes_recv
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # Calculate network deltas
|
|
|
|
|
+ net_in_mbps = 0
|
|
|
|
|
+ net_out_mbps = 0
|
|
|
|
|
+ net_packets_in_per_sec = 0
|
|
|
|
|
+ net_packets_out_per_sec = 0
|
|
|
|
|
+
|
|
|
|
|
+ if self.previous_net_io and self.previous_timestamp:
|
|
|
|
|
+ time_delta = current_timestamp - self.previous_timestamp
|
|
|
|
|
+ if time_delta > 0:
|
|
|
|
|
+ net_in_mbps = ((net_io.bytes_recv - self.previous_net_io.bytes_recv) / time_delta) / (1024 * 1024)
|
|
|
|
|
+ net_out_mbps = ((net_io.bytes_sent - self.previous_net_io.bytes_sent) / time_delta) / (1024 * 1024)
|
|
|
|
|
+ net_packets_in_per_sec = (net_io.packets_recv - self.previous_net_io.packets_recv) / time_delta
|
|
|
|
|
+ net_packets_out_per_sec = (net_io.packets_sent - self.previous_net_io.packets_sent) / time_delta
|
|
|
|
|
+
|
|
|
|
|
+ self.previous_net_io = net_io
|
|
|
|
|
+ self.previous_timestamp = current_timestamp
|
|
|
|
|
+
|
|
|
|
|
+ # Processes - top CPU and memory consumers
|
|
|
|
|
+ processes = []
|
|
|
|
|
+ for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent']):
|
|
|
|
|
+ try:
|
|
|
|
|
+ processes.append(proc.info)
|
|
|
|
|
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
|
|
+ top_cpu = sorted(processes, key=lambda p: p.get('cpu_percent', 0), reverse=True)[:5]
|
|
|
|
|
+ top_mem = sorted(processes, key=lambda p: p.get('memory_percent', 0), reverse=True)[:5]
|
|
|
|
|
+
|
|
|
|
|
+ # Clean up process info
|
|
|
|
|
+ top_cpu_clean = [
|
|
|
|
|
+ {'pid': p['pid'], 'name': p['name'], 'cpu': round(p.get('cpu_percent', 0), 1)}
|
|
|
|
|
+ for p in top_cpu if p.get('cpu_percent', 0) > 0
|
|
|
|
|
+ ]
|
|
|
|
|
+ top_mem_clean = [
|
|
|
|
|
+ {'pid': p['pid'], 'name': p['name'], 'mem': round(p.get('memory_percent', 0), 1)}
|
|
|
|
|
+ for p in top_mem if p.get('memory_percent', 0) > 0
|
|
|
|
|
+ ]
|
|
|
|
|
|
|
|
return {
|
|
return {
|
|
|
'timestamp': datetime.now(timezone.utc),
|
|
'timestamp': datetime.now(timezone.utc),
|
|
|
|
|
+ # CPU
|
|
|
'cpu_percent': cpu_percent,
|
|
'cpu_percent': cpu_percent,
|
|
|
'cpu_count': cpu_count,
|
|
'cpu_count': cpu_count,
|
|
|
- 'memory_total': memory_total,
|
|
|
|
|
- 'memory_used': memory_used,
|
|
|
|
|
- 'memory_percent': memory_percent,
|
|
|
|
|
|
|
+ 'cpu_per_core': cpu_per_core,
|
|
|
|
|
+ 'cpu_steal': getattr(cpu_times, 'steal', 0), # VM steal time
|
|
|
|
|
+ 'context_switches_per_sec': int(ctx_switches_per_sec),
|
|
|
|
|
+ 'interrupts_per_sec': int(interrupts_per_sec),
|
|
|
|
|
+ # Memory
|
|
|
|
|
+ 'memory_total': mem.total,
|
|
|
|
|
+ 'memory_used': mem.used,
|
|
|
|
|
+ 'memory_percent': mem.percent,
|
|
|
|
|
+ 'memory_available': mem.available,
|
|
|
|
|
+ 'memory_buffers': getattr(mem, 'buffers', 0),
|
|
|
|
|
+ 'memory_cached': getattr(mem, 'cached', 0),
|
|
|
|
|
+ 'swap_total': swap.total,
|
|
|
|
|
+ 'swap_used': swap.used,
|
|
|
|
|
+ 'swap_percent': swap.percent,
|
|
|
|
|
+ # Load
|
|
|
'load_1': load_1,
|
|
'load_1': load_1,
|
|
|
'load_5': load_5,
|
|
'load_5': load_5,
|
|
|
'load_15': load_15,
|
|
'load_15': load_15,
|
|
|
- 'disk_read_bytes': disk_read_bytes,
|
|
|
|
|
- 'disk_write_bytes': disk_write_bytes,
|
|
|
|
|
- 'disk_usage_percent': disk_usage_percent,
|
|
|
|
|
- 'net_sent_bytes': net_sent_bytes,
|
|
|
|
|
- 'net_recv_bytes': net_recv_bytes,
|
|
|
|
|
|
|
+ # Disk I/O
|
|
|
|
|
+ 'disk_read_bytes': disk_io.read_bytes,
|
|
|
|
|
+ 'disk_write_bytes': disk_io.write_bytes,
|
|
|
|
|
+ 'disk_read_iops': int(disk_read_iops),
|
|
|
|
|
+ 'disk_write_iops': int(disk_write_iops),
|
|
|
|
|
+ 'disk_read_mbps': round(disk_read_mbps, 2),
|
|
|
|
|
+ 'disk_write_mbps': round(disk_write_mbps, 2),
|
|
|
|
|
+ 'disk_io_time_ms': getattr(disk_io, 'read_time', 0) + getattr(disk_io, 'write_time', 0),
|
|
|
|
|
+ 'disk_usage_percent': disk_usage.percent,
|
|
|
|
|
+ # Network
|
|
|
|
|
+ 'net_sent_bytes': net_io.bytes_sent,
|
|
|
|
|
+ 'net_recv_bytes': net_io.bytes_recv,
|
|
|
|
|
+ 'net_in_mbps': round(net_in_mbps, 2),
|
|
|
|
|
+ 'net_out_mbps': round(net_out_mbps, 2),
|
|
|
|
|
+ 'net_packets_in_per_sec': int(net_packets_in_per_sec),
|
|
|
|
|
+ 'net_packets_out_per_sec': int(net_packets_out_per_sec),
|
|
|
|
|
+ 'net_errors_in': net_io.errin,
|
|
|
|
|
+ 'net_errors_out': net_io.errout,
|
|
|
|
|
+ 'net_drops_in': net_io.dropin,
|
|
|
|
|
+ 'net_drops_out': net_io.dropout,
|
|
|
|
|
+ # Processes
|
|
|
|
|
+ 'process_count': len(psutil.pids()),
|
|
|
|
|
+ 'thread_count': sum(p.num_threads() for p in psutil.process_iter() if p.is_running()),
|
|
|
|
|
+ 'top_cpu_processes': top_cpu_clean,
|
|
|
|
|
+ 'top_mem_processes': top_mem_clean,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
async def store_metrics(self, metrics: dict):
|
|
async def store_metrics(self, metrics: dict):
|