Switched from Dockmon to Beszel

2025-10-31 17:13:00 +01:00
parent cc6454cef9
commit f4a4142799
75 changed files with 24313 additions and 122 deletions
--- a/dockmon/backend/security/init.py
+++ b/dockmon/backend/security/init.py
@@ -0,0 +1 @@
+# Security module for DockMon
--- a/dockmon/backend/security/audit.py
+++ b/dockmon/backend/security/audit.py
@@ -0,0 +1,312 @@
+"""
+Security Audit Logging System for DockMon
+Tracks all security-relevant events for incident response
+"""
+
+import json
+import logging
+import os
+from logging.handlers import RotatingFileHandler
+from typing import Dict, List, Optional
+
+
+class SecurityAuditLogger:
+    """
+    Comprehensive security audit logging system
+    Tracks all security-relevant events for incident response
+    """
+    def __init__(self, event_logger=None):
+        self.security_logger = logging.getLogger('security_audit')
+        self.event_logger = event_logger
+
+        # Create separate log file for security events in persistent volume
+        from config.paths import DATA_DIR
+        log_dir = os.path.join(DATA_DIR, 'logs')
+        os.makedirs(log_dir, mode=0o700, exist_ok=True)
+
+        # Rotating file handler for security audit logs
+        # Max 10MB per file, keep 14 backups (total max 150MB with current + 14 backups)
+        security_handler = RotatingFileHandler(
+            os.path.join(log_dir, 'security_audit.log'),
+            maxBytes=10*1024*1024,  # 10MB
+            backupCount=14,  # Keep 14 old files
+            encoding='utf-8'
+        )
+        security_handler.setLevel(logging.INFO)
+
+        # Structured logging format for security events
+        security_formatter = logging.Formatter(
+            '%(asctime)s - SECURITY - %(levelname)s - %(message)s',
+            datefmt='%Y-%m-%d %H:%M:%S UTC'
+        )
+        security_handler.setFormatter(security_formatter)
+        self.security_logger.addHandler(security_handler)
+        self.security_logger.setLevel(logging.INFO)
+        self.security_logger.propagate = False  # Don't propagate to root logger
+
+    def set_event_logger(self, event_logger):
+        """Set the event logger instance for logging to Event Viewer"""
+        self.event_logger = event_logger
+
+    def _log_security_event(self, level: str, event_type: str, client_ip: str,
+                           endpoint: str = None, user_agent: str = None,
+                           details: dict = None, risk_level: str = "LOW"):
+        """Internal method to log structured security events"""
+        log_data = {
+            "event_type": event_type,
+            "client_ip": client_ip,
+            "endpoint": endpoint,
+            "user_agent": user_agent or "unknown",
+            "risk_level": risk_level,
+            "details": details or {}
+        }
+
+        # Convert to JSON for structured logging
+        message = json.dumps(log_data, default=str)
+
+        if level.upper() == "ERROR":
+            self.security_logger.error(message)
+        elif level.upper() == "WARNING":
+            self.security_logger.warning(message)
+        else:
+            self.security_logger.info(message)
+
+    def log_authentication_attempt(self, client_ip: str, success: bool, endpoint: str, user_agent: str = None):
+        """Log authentication attempts (both success and failure)"""
+        event_type = "AUTH_SUCCESS" if success else "AUTH_FAILURE"
+        risk_level = "LOW" if success else "MEDIUM"
+        level = "INFO" if success else "WARNING"
+
+        self._log_security_event(
+            level=level,
+            event_type=event_type,
+            client_ip=client_ip,
+            endpoint=endpoint,
+            user_agent=user_agent,
+            risk_level=risk_level
+        )
+
+    def log_rate_limit_violation(self, client_ip: str, endpoint: str, violations: int, banned: bool = False):
+        """Log rate limiting violations and bans"""
+        event_type = "RATE_LIMIT_BAN" if banned else "RATE_LIMIT_VIOLATION"
+        risk_level = "HIGH" if banned else "MEDIUM"
+
+        self._log_security_event(
+            level="ERROR" if banned else "WARNING",
+            event_type=event_type,
+            client_ip=client_ip,
+            endpoint=endpoint,
+            details={"violation_count": violations, "banned": banned},
+            risk_level=risk_level
+        )
+
+    def log_input_validation_failure(self, client_ip: str, endpoint: str, field: str,
+                                   attempted_value: str, user_agent: str = None):
+        """Log input validation failures (potential attacks)"""
+        self._log_security_event(
+            level="WARNING",
+            event_type="INPUT_VALIDATION_FAILURE",
+            client_ip=client_ip,
+            endpoint=endpoint,
+            user_agent=user_agent,
+            details={
+                "field": field,
+                "attempted_value": attempted_value[:100] if attempted_value else None,  # Limit log size
+                "attack_indicators": self._detect_attack_patterns(attempted_value)
+            },
+            risk_level="HIGH"
+        )
+
+    def log_cors_violation(self, client_ip: str, origin: str, endpoint: str):
+        """Log CORS policy violations"""
+        self._log_security_event(
+            level="WARNING",
+            event_type="CORS_VIOLATION",
+            client_ip=client_ip,
+            endpoint=endpoint,
+            details={"blocked_origin": origin},
+            risk_level="MEDIUM"
+        )
+
+    def log_privileged_action(self, client_ip: str, action: str, target: str, success: bool, user_agent: str = None):
+        """Log privileged actions (host management, container control, etc.)"""
+        event_type = f"PRIVILEGED_ACTION_{action.upper()}"
+        risk_level = "MEDIUM" if success else "HIGH"
+
+        self._log_security_event(
+            level="INFO" if success else "ERROR",
+            event_type=event_type,
+            client_ip=client_ip,
+            user_agent=user_agent,
+            details={
+                "action": action,
+                "target": target,
+                "success": success
+            },
+            risk_level=risk_level
+        )
+
+    def log_suspicious_activity(self, client_ip: str, activity_type: str, details: dict, endpoint: str = None):
+        """Log suspicious activities that don't fit other categories"""
+        self._log_security_event(
+            level="ERROR",
+            event_type="SUSPICIOUS_ACTIVITY",
+            client_ip=client_ip,
+            endpoint=endpoint,
+            details={
+                "activity_type": activity_type,
+                **details
+            },
+            risk_level="HIGH"
+        )
+
+    def _detect_attack_patterns(self, value: str) -> list:
+        """Detect common attack patterns in input"""
+        if not value:
+            return []
+
+        patterns = []
+        value_lower = value.lower()
+
+        # XSS patterns
+        if any(pattern in value_lower for pattern in ['<script', 'javascript:', 'onerror=', 'onload=']):
+            patterns.append("XSS")
+
+        # SQL injection patterns
+        if any(pattern in value_lower for pattern in [' or ', ' union ', ' select ', "'; drop", '1=1']):
+            patterns.append("SQL_INJECTION")
+
+        # Command injection patterns
+        if any(pattern in value_lower for pattern in ['; rm ', '| cat ', '&& curl', 'wget ', '`whoami`']):
+            patterns.append("COMMAND_INJECTION")
+
+        # Path traversal patterns
+        if any(pattern in value for pattern in ['../../../', '..\\..\\', '/etc/passwd', 'c:\\windows']):
+            patterns.append("PATH_TRAVERSAL")
+
+        # SSRF patterns
+        if any(pattern in value_lower for pattern in ['localhost', '127.0.0.1', '169.254.169.254']):
+            patterns.append("SSRF")
+
+        return patterns
+
+    def get_security_stats(self, hours: int = 24) -> dict:
+        """Get security statistics for the last N hours"""
+        # This is a simplified version - in production you'd query actual log files
+        return {
+            "timeframe_hours": hours,
+            "total_security_events": "N/A - check logs/security_audit.log",
+            "log_location": "logs/security_audit.log",
+            "note": "Parse JSON logs for detailed statistics"
+        }
+
+    def log_login_success(self, client_ip: str, user_agent: str, session_id: str):
+        """Log successful login attempt"""
+        self._log_security_event(
+            level="INFO",
+            event_type="LOGIN_SUCCESS",
+            client_ip=client_ip,
+            user_agent=user_agent,
+            details={"session_id": session_id[:8] + "..."},  # Don't log full session ID
+            risk_level="LOW"
+        )
+
+    def log_login_failure(self, client_ip: str, user_agent: str, reason: str):
+        """Log failed login attempt"""
+        self._log_security_event(
+            level="WARNING",
+            event_type="LOGIN_FAILURE",
+            client_ip=client_ip,
+            user_agent=user_agent,
+            details={"reason": reason},
+            risk_level="MEDIUM"
+        )
+
+    def log_session_expired(self, client_ip: str, session_id: str):
+        """Log session expiration"""
+        self._log_security_event(
+            level="INFO",
+            event_type="SESSION_EXPIRED",
+            client_ip=client_ip,
+            details={"session_id": session_id[:8] + "..."},
+            risk_level="LOW"
+        )
+
+    def log_session_hijack_attempt(self, original_ip: str, attempted_ip: str, session_id: str):
+        """Log potential session hijacking attempt"""
+        self._log_security_event(
+            level="ERROR",
+            event_type="SESSION_HIJACK_ATTEMPT",
+            client_ip=attempted_ip,
+            details={
+                "original_ip": original_ip,
+                "attempted_ip": attempted_ip,
+                "session_id": session_id[:8] + "..."
+            },
+            risk_level="HIGH"
+        )
+
+    def log_authentication_failure(self, client_ip: str, user_agent: str, reason: str):
+        """Log authentication failure"""
+        self._log_security_event(
+            level="WARNING",
+            event_type="AUTH_FAILURE",
+            client_ip=client_ip,
+            user_agent=user_agent,
+            details={"reason": reason},
+            risk_level="MEDIUM"
+        )
+
+    def log_password_change(self, client_ip: str, user_agent: str, username: str):
+        """Log password change event"""
+        self._log_security_event(
+            level="INFO",
+            event_type="PASSWORD_CHANGE",
+            client_ip=client_ip,
+            user_agent=user_agent,
+            details={"username": username, "message": f"Password changed for user: {username}"},
+            risk_level="LOW"
+        )
+
+        # Also log to event logger for Event Viewer
+        if self.event_logger:
+            from event_logger import EventCategory, EventType, EventSeverity
+            self.event_logger.log_event(
+                category=EventCategory.USER,
+                event_type=EventType.CONFIG_CHANGED,
+                title="Password Changed",
+                message=f"User '{username}' changed their password from IP: {client_ip}",
+                severity=EventSeverity.INFO,
+                details={"username": username, "client_ip": client_ip, "user_agent": user_agent}
+            )
+
+    def log_username_change(self, client_ip: str, user_agent: str, old_username: str, new_username: str):
+        """Log username change event"""
+        self._log_security_event(
+            level="INFO",
+            event_type="USERNAME_CHANGE",
+            client_ip=client_ip,
+            user_agent=user_agent,
+            details={
+                "old_username": old_username,
+                "new_username": new_username,
+                "message": f"Username changed from {old_username} to {new_username}"
+            },
+            risk_level="LOW"
+        )
+
+        # Also log to event logger for Event Viewer
+        if self.event_logger:
+            from event_logger import EventCategory, EventType, EventSeverity
+            self.event_logger.log_event(
+                category=EventCategory.USER,
+                event_type=EventType.CONFIG_CHANGED,
+                title="Username Changed",
+                message=f"Username changed from '{old_username}' to '{new_username}' from IP: {client_ip}",
+                severity=EventSeverity.INFO,
+                details={"old_username": old_username, "new_username": new_username, "client_ip": client_ip, "user_agent": user_agent}
+            )
+
+
+# Global security audit logger instance
+security_audit = SecurityAuditLogger()
--- a/dockmon/backend/security/rate_limiting.py
+++ b/dockmon/backend/security/rate_limiting.py
@@ -0,0 +1,185 @@
+"""
+Rate Limiting System for DockMon
+Provides protection against abuse and DoS attacks using token bucket algorithm
+"""
+
+import logging
+import os
+import time
+from collections import defaultdict
+from datetime import datetime
+from typing import Dict, Tuple
+
+from fastapi import Request, HTTPException, status, Depends
+
+from .audit import security_audit
+
+logger = logging.getLogger(__name__)
+
+
+class RateLimiter:
+    """
+    In-memory rate limiter using token bucket algorithm
+    Provides protection against abuse and DoS attacks
+    """
+    def __init__(self):
+        # Start with generous initial tokens to allow immediate bursts
+        # This prevents legitimate users from getting rate limited on first use
+        self.clients = defaultdict(lambda: {"tokens": 100, "last_update": time.time(), "violations": 0})
+
+        # Get rate limits from environment or use production-friendly defaults
+        self.limits = {
+            # endpoint_pattern: (requests_per_minute, burst_limit, violation_threshold)
+            "default": (
+                int(os.getenv('DOCKMON_RATE_LIMIT_DEFAULT', 120)),
+                int(os.getenv('DOCKMON_RATE_BURST_DEFAULT', 20)),
+                int(os.getenv('DOCKMON_RATE_VIOLATIONS_DEFAULT', 8))
+            ),
+            "auth": (
+                int(os.getenv('DOCKMON_RATE_LIMIT_AUTH', 10)),  # 10 per minute for auth
+                int(os.getenv('DOCKMON_RATE_BURST_AUTH', 5)),   # Lower burst
+                int(os.getenv('DOCKMON_RATE_VIOLATIONS_AUTH', 10))  # More lenient violations
+            ),
+            "hosts": (
+                int(os.getenv('DOCKMON_RATE_LIMIT_HOSTS', 60)),
+                int(os.getenv('DOCKMON_RATE_BURST_HOSTS', 15)),
+                int(os.getenv('DOCKMON_RATE_VIOLATIONS_HOSTS', 8))
+            ),
+            "containers": (
+                int(os.getenv('DOCKMON_RATE_LIMIT_CONTAINERS', 900)),  # Increased for logs polling with multiple containers
+                int(os.getenv('DOCKMON_RATE_BURST_CONTAINERS', 180)),
+                int(os.getenv('DOCKMON_RATE_VIOLATIONS_CONTAINERS', 25))
+            ),
+            "notifications": (
+                int(os.getenv('DOCKMON_RATE_LIMIT_NOTIFICATIONS', 30)),
+                int(os.getenv('DOCKMON_RATE_BURST_NOTIFICATIONS', 10)),
+                int(os.getenv('DOCKMON_RATE_VIOLATIONS_NOTIFICATIONS', 5))
+            ),
+        }
+
+        logger.info(f"Rate limiting configured: Default={self.limits['default'][0]}/min, "
+                   f"Auth={self.limits['auth'][0]}/min, Containers={self.limits['containers'][0]}/min")
+        self.banned_clients = {}  # IP -> ban_until_timestamp
+
+    def _get_limit(self, endpoint: str) -> tuple:
+        """Get rate limit for specific endpoint"""
+        for pattern, limits in self.limits.items():
+            if pattern in endpoint.lower():
+                return limits
+        return self.limits["default"]
+
+    def _cleanup_old_entries(self):
+        """Clean up old entries to prevent memory leaks"""
+        current_time = time.time()
+        # Remove clients not seen for 1 hour
+        cutoff_time = current_time - 3600
+
+        old_clients = [ip for ip, data in self.clients.items()
+                      if data["last_update"] < cutoff_time]
+        for ip in old_clients:
+            del self.clients[ip]
+
+        # Remove expired bans
+        expired_bans = [ip for ip, ban_time in self.banned_clients.items()
+                       if current_time > ban_time]
+        for ip in expired_bans:
+            del self.banned_clients[ip]
+
+    def is_allowed(self, client_ip: str, endpoint: str) -> Tuple[bool, str]:
+        """Check if request is allowed and return (allowed, reason)"""
+        current_time = time.time()
+
+        # Cleanup old entries periodically
+        if current_time % 300 < 1:  # Every 5 minutes
+            self._cleanup_old_entries()
+
+        # Check if client is banned
+        if client_ip in self.banned_clients:
+            if current_time < self.banned_clients[client_ip]:
+                return False, f"IP banned until {datetime.fromtimestamp(self.banned_clients[client_ip]).isoformat()}"
+            else:
+                # Ban expired, remove from banned list
+                del self.banned_clients[client_ip]
+
+        requests_per_minute, burst_limit, violation_threshold = self._get_limit(endpoint)
+        client_data = self.clients[client_ip]
+
+        # Token bucket algorithm with burst support
+        time_passed = current_time - client_data["last_update"]
+        tokens_to_add = (time_passed / 60.0) * requests_per_minute
+        # Allow bursting up to burst_limit tokens
+        client_data["tokens"] = min(burst_limit, client_data["tokens"] + tokens_to_add)
+        client_data["last_update"] = current_time
+
+        # Check if request is allowed
+        if client_data["tokens"] >= 1.0:
+            client_data["tokens"] -= 1.0
+            return True, "OK"
+        else:
+            # Rate limit exceeded
+            client_data["violations"] += 1
+
+            # Check if violations exceed threshold - ban the client
+            if client_data["violations"] >= violation_threshold:
+                ban_duration = 60  # 60 seconds ban (reduced from 15 minutes for better UX)
+                self.banned_clients[client_ip] = current_time + ban_duration
+                logger.warning(f"IP {client_ip} banned for 60 seconds due to {violation_threshold} rate limit violations")
+
+                # Security audit log
+                security_audit.log_rate_limit_violation(
+                    client_ip=client_ip,
+                    endpoint=endpoint,
+                    violations=client_data["violations"],
+                    banned=True
+                )
+
+                return False, f"IP banned for repeated violations"
+
+            # Log rate limit violation
+            security_audit.log_rate_limit_violation(
+                client_ip=client_ip,
+                endpoint=endpoint,
+                violations=client_data["violations"],
+                banned=False
+            )
+
+            return False, f"Rate limit exceeded. Try again in {int(60 - time_passed)} seconds"
+
+    def get_stats(self) -> dict:
+        """Get rate limiter statistics"""
+        return {
+            "active_clients": len(self.clients),
+            "banned_clients": len(self.banned_clients),
+            "total_violations": sum(data["violations"] for data in self.clients.values())
+        }
+
+
+# Global rate limiter instance
+rate_limiter = RateLimiter()
+
+
+def get_rate_limit_dependency(endpoint_type: str = "default"):
+    """Create a dependency for rate limiting specific endpoint types"""
+    def rate_limit_check(request: Request):
+        client_ip = request.client.host
+        endpoint_name = f"{endpoint_type}_{request.url.path}"
+
+        allowed, reason = rate_limiter.is_allowed(client_ip, endpoint_name)
+
+        if not allowed:
+            logger.warning(f"Rate limit exceeded for {client_ip} on {endpoint_name}: {reason}")
+            raise HTTPException(
+                status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+                detail=f"Rate limit exceeded: {reason}",
+                headers={"Retry-After": "60"}
+            )
+        return True
+    return rate_limit_check
+
+
+# Rate limiting dependencies for different endpoint types
+rate_limit_auth = Depends(get_rate_limit_dependency("auth"))
+rate_limit_hosts = Depends(get_rate_limit_dependency("hosts"))
+rate_limit_containers = Depends(get_rate_limit_dependency("containers"))
+rate_limit_notifications = Depends(get_rate_limit_dependency("notifications"))
+rate_limit_default = Depends(get_rate_limit_dependency("default"))