Files
docker_dev/dockmon/backend/realtime.py

206 lines
8.1 KiB
Python

"""
Real-time monitoring and WebSocket management for DockMon
Provides live container updates and stats streaming
Note: Docker event monitoring is now handled by the Go service
"""
import asyncio
import json
import logging
from datetime import datetime
from typing import Dict, List, Set, Optional, Any
from dataclasses import dataclass, asdict
import docker
from docker.models.containers import Container as DockerContainer
logger = logging.getLogger(__name__)
# Custom JSON encoder for datetime objects
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
@dataclass
class ContainerStats:
"""Real-time container statistics (used for WebSocket stats streaming)"""
container_id: str
cpu_percent: float
memory_mb: float
memory_percent: float
memory_limit_mb: float
network_rx_mb: float
network_tx_mb: float
block_read_mb: float
block_write_mb: float
pids: int
timestamp: str
class RealtimeMonitor:
"""Manages real-time container monitoring and events"""
def __init__(self):
self.stats_subscribers: Dict[str, Set[Any]] = {} # container_id -> set of websockets
self.event_subscribers: Set[Any] = set() # websockets listening to all events
self.monitoring_tasks: Dict[str, asyncio.Task] = {}
async def subscribe_to_stats(self, websocket: Any, container_id: str):
"""Subscribe a websocket to container stats"""
if container_id not in self.stats_subscribers:
self.stats_subscribers[container_id] = set()
self.stats_subscribers[container_id].add(websocket)
logger.info(f"WebSocket subscribed to stats for container {container_id}")
async def unsubscribe_from_stats(self, websocket: Any, container_id: str):
"""Unsubscribe a websocket from container stats"""
if container_id in self.stats_subscribers:
self.stats_subscribers[container_id].discard(websocket)
if not self.stats_subscribers[container_id]:
del self.stats_subscribers[container_id]
# Stop monitoring if no subscribers
if container_id in self.monitoring_tasks:
self.monitoring_tasks[container_id].cancel()
del self.monitoring_tasks[container_id]
async def subscribe_to_events(self, websocket: Any):
"""Subscribe a websocket to all Docker events"""
self.event_subscribers.add(websocket)
logger.info("WebSocket subscribed to Docker events")
async def unsubscribe_from_events(self, websocket: Any):
"""Unsubscribe a websocket from Docker events"""
self.event_subscribers.discard(websocket)
async def start_container_stats_stream(self, client: docker.DockerClient,
container_id: str, interval: int = 2):
"""Start streaming stats for a specific container"""
if container_id in self.monitoring_tasks:
return # Already monitoring
task = asyncio.create_task(
self._monitor_container_stats(client, container_id, interval)
)
self.monitoring_tasks[container_id] = task
async def _monitor_container_stats(self, client: docker.DockerClient,
container_id: str, interval: int):
"""Monitor and broadcast container stats"""
logger.info(f"Starting stats monitoring for container {container_id}")
while container_id in self.stats_subscribers and self.stats_subscribers[container_id]:
try:
container = client.containers.get(container_id)
if container.status != 'running':
await asyncio.sleep(interval)
continue
stats = self._calculate_container_stats(container)
# Broadcast to all subscribers
dead_sockets = []
for websocket in self.stats_subscribers.get(container_id, []):
try:
await websocket.send_text(json.dumps({
"type": "container_stats",
"data": asdict(stats)
}, cls=DateTimeEncoder))
except Exception as e:
logger.error(f"Error sending stats to websocket: {e}")
dead_sockets.append(websocket)
# Clean up dead sockets
for ws in dead_sockets:
await self.unsubscribe_from_stats(ws, container_id)
except docker.errors.NotFound:
logger.warning(f"Container {container_id} not found")
break
except Exception as e:
logger.error(f"Error monitoring container {container_id}: {e}")
await asyncio.sleep(interval)
logger.info(f"Stopped stats monitoring for container {container_id}")
def _calculate_container_stats(self, container: DockerContainer) -> ContainerStats:
"""Calculate container statistics from Docker stats API"""
try:
stats = container.stats(stream=False)
# CPU calculation
cpu_delta = stats["cpu_stats"]["cpu_usage"]["total_usage"] - \
stats["precpu_stats"]["cpu_usage"]["total_usage"]
system_cpu_delta = stats["cpu_stats"]["system_cpu_usage"] - \
stats["precpu_stats"]["system_cpu_usage"]
number_cpus = len(stats["cpu_stats"]["cpu_usage"].get("percpu_usage", [1]))
cpu_percent = 0.0
if system_cpu_delta > 0.0 and cpu_delta > 0.0:
cpu_percent = (cpu_delta / system_cpu_delta) * number_cpus * 100.0
# Memory calculation
mem_stats = stats.get("memory_stats", {})
mem_usage = mem_stats.get("usage", 0)
mem_limit = mem_stats.get("limit", 1)
mem_percent = (mem_usage / mem_limit) * 100 if mem_limit > 0 else 0
# Network I/O
networks = stats.get("networks", {})
net_rx = sum(net.get("rx_bytes", 0) for net in networks.values())
net_tx = sum(net.get("tx_bytes", 0) for net in networks.values())
# Block I/O
blkio = stats.get("blkio_stats", {})
io_read = 0
io_write = 0
if "io_service_bytes_recursive" in blkio:
for item in blkio["io_service_bytes_recursive"]:
if item["op"] == "Read":
io_read += item["value"]
elif item["op"] == "Write":
io_write += item["value"]
# Process count
pids = stats.get("pids_stats", {}).get("current", 0)
return ContainerStats(
container_id=container.id[:12],
cpu_percent=round(cpu_percent, 2),
memory_mb=round(mem_usage / (1024 * 1024), 2),
memory_percent=round(mem_percent, 2),
memory_limit_mb=round(mem_limit / (1024 * 1024), 2),
network_rx_mb=round(net_rx / (1024 * 1024), 2),
network_tx_mb=round(net_tx / (1024 * 1024), 2),
block_read_mb=round(io_read / (1024 * 1024), 2),
block_write_mb=round(io_write / (1024 * 1024), 2),
pids=pids,
timestamp=datetime.now().isoformat()
)
except Exception as e:
logger.error(f"Error calculating stats: {e}")
return ContainerStats(
container_id=container.id[:12],
cpu_percent=0,
memory_mb=0,
memory_percent=0,
memory_limit_mb=0,
network_rx_mb=0,
network_tx_mb=0,
block_read_mb=0,
block_write_mb=0,
pids=0,
timestamp=datetime.now().isoformat()
)
def stop_all_monitoring(self):
"""Stop all monitoring tasks"""
logger.info("Stopping all monitoring tasks")
# Cancel stats monitoring
for task in self.monitoring_tasks.values():
task.cancel()
self.monitoring_tasks.clear()