Switched from Dockmon to Beszel

This commit is contained in:
2025-10-31 17:13:00 +01:00
parent cc6454cef9
commit f4a4142799
75 changed files with 24313 additions and 122 deletions

View File

@@ -0,0 +1 @@
# Security module for DockMon

View File

@@ -0,0 +1,312 @@
"""
Security Audit Logging System for DockMon
Tracks all security-relevant events for incident response
"""
import json
import logging
import os
from logging.handlers import RotatingFileHandler
from typing import Dict, List, Optional
class SecurityAuditLogger:
"""
Comprehensive security audit logging system
Tracks all security-relevant events for incident response
"""
def __init__(self, event_logger=None):
self.security_logger = logging.getLogger('security_audit')
self.event_logger = event_logger
# Create separate log file for security events in persistent volume
from config.paths import DATA_DIR
log_dir = os.path.join(DATA_DIR, 'logs')
os.makedirs(log_dir, mode=0o700, exist_ok=True)
# Rotating file handler for security audit logs
# Max 10MB per file, keep 14 backups (total max 150MB with current + 14 backups)
security_handler = RotatingFileHandler(
os.path.join(log_dir, 'security_audit.log'),
maxBytes=10*1024*1024, # 10MB
backupCount=14, # Keep 14 old files
encoding='utf-8'
)
security_handler.setLevel(logging.INFO)
# Structured logging format for security events
security_formatter = logging.Formatter(
'%(asctime)s - SECURITY - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S UTC'
)
security_handler.setFormatter(security_formatter)
self.security_logger.addHandler(security_handler)
self.security_logger.setLevel(logging.INFO)
self.security_logger.propagate = False # Don't propagate to root logger
def set_event_logger(self, event_logger):
"""Set the event logger instance for logging to Event Viewer"""
self.event_logger = event_logger
def _log_security_event(self, level: str, event_type: str, client_ip: str,
endpoint: str = None, user_agent: str = None,
details: dict = None, risk_level: str = "LOW"):
"""Internal method to log structured security events"""
log_data = {
"event_type": event_type,
"client_ip": client_ip,
"endpoint": endpoint,
"user_agent": user_agent or "unknown",
"risk_level": risk_level,
"details": details or {}
}
# Convert to JSON for structured logging
message = json.dumps(log_data, default=str)
if level.upper() == "ERROR":
self.security_logger.error(message)
elif level.upper() == "WARNING":
self.security_logger.warning(message)
else:
self.security_logger.info(message)
def log_authentication_attempt(self, client_ip: str, success: bool, endpoint: str, user_agent: str = None):
"""Log authentication attempts (both success and failure)"""
event_type = "AUTH_SUCCESS" if success else "AUTH_FAILURE"
risk_level = "LOW" if success else "MEDIUM"
level = "INFO" if success else "WARNING"
self._log_security_event(
level=level,
event_type=event_type,
client_ip=client_ip,
endpoint=endpoint,
user_agent=user_agent,
risk_level=risk_level
)
def log_rate_limit_violation(self, client_ip: str, endpoint: str, violations: int, banned: bool = False):
"""Log rate limiting violations and bans"""
event_type = "RATE_LIMIT_BAN" if banned else "RATE_LIMIT_VIOLATION"
risk_level = "HIGH" if banned else "MEDIUM"
self._log_security_event(
level="ERROR" if banned else "WARNING",
event_type=event_type,
client_ip=client_ip,
endpoint=endpoint,
details={"violation_count": violations, "banned": banned},
risk_level=risk_level
)
def log_input_validation_failure(self, client_ip: str, endpoint: str, field: str,
attempted_value: str, user_agent: str = None):
"""Log input validation failures (potential attacks)"""
self._log_security_event(
level="WARNING",
event_type="INPUT_VALIDATION_FAILURE",
client_ip=client_ip,
endpoint=endpoint,
user_agent=user_agent,
details={
"field": field,
"attempted_value": attempted_value[:100] if attempted_value else None, # Limit log size
"attack_indicators": self._detect_attack_patterns(attempted_value)
},
risk_level="HIGH"
)
def log_cors_violation(self, client_ip: str, origin: str, endpoint: str):
"""Log CORS policy violations"""
self._log_security_event(
level="WARNING",
event_type="CORS_VIOLATION",
client_ip=client_ip,
endpoint=endpoint,
details={"blocked_origin": origin},
risk_level="MEDIUM"
)
def log_privileged_action(self, client_ip: str, action: str, target: str, success: bool, user_agent: str = None):
"""Log privileged actions (host management, container control, etc.)"""
event_type = f"PRIVILEGED_ACTION_{action.upper()}"
risk_level = "MEDIUM" if success else "HIGH"
self._log_security_event(
level="INFO" if success else "ERROR",
event_type=event_type,
client_ip=client_ip,
user_agent=user_agent,
details={
"action": action,
"target": target,
"success": success
},
risk_level=risk_level
)
def log_suspicious_activity(self, client_ip: str, activity_type: str, details: dict, endpoint: str = None):
"""Log suspicious activities that don't fit other categories"""
self._log_security_event(
level="ERROR",
event_type="SUSPICIOUS_ACTIVITY",
client_ip=client_ip,
endpoint=endpoint,
details={
"activity_type": activity_type,
**details
},
risk_level="HIGH"
)
def _detect_attack_patterns(self, value: str) -> list:
"""Detect common attack patterns in input"""
if not value:
return []
patterns = []
value_lower = value.lower()
# XSS patterns
if any(pattern in value_lower for pattern in ['<script', 'javascript:', 'onerror=', 'onload=']):
patterns.append("XSS")
# SQL injection patterns
if any(pattern in value_lower for pattern in [' or ', ' union ', ' select ', "'; drop", '1=1']):
patterns.append("SQL_INJECTION")
# Command injection patterns
if any(pattern in value_lower for pattern in ['; rm ', '| cat ', '&& curl', 'wget ', '`whoami`']):
patterns.append("COMMAND_INJECTION")
# Path traversal patterns
if any(pattern in value for pattern in ['../../../', '..\\..\\', '/etc/passwd', 'c:\\windows']):
patterns.append("PATH_TRAVERSAL")
# SSRF patterns
if any(pattern in value_lower for pattern in ['localhost', '127.0.0.1', '169.254.169.254']):
patterns.append("SSRF")
return patterns
def get_security_stats(self, hours: int = 24) -> dict:
"""Get security statistics for the last N hours"""
# This is a simplified version - in production you'd query actual log files
return {
"timeframe_hours": hours,
"total_security_events": "N/A - check logs/security_audit.log",
"log_location": "logs/security_audit.log",
"note": "Parse JSON logs for detailed statistics"
}
def log_login_success(self, client_ip: str, user_agent: str, session_id: str):
"""Log successful login attempt"""
self._log_security_event(
level="INFO",
event_type="LOGIN_SUCCESS",
client_ip=client_ip,
user_agent=user_agent,
details={"session_id": session_id[:8] + "..."}, # Don't log full session ID
risk_level="LOW"
)
def log_login_failure(self, client_ip: str, user_agent: str, reason: str):
"""Log failed login attempt"""
self._log_security_event(
level="WARNING",
event_type="LOGIN_FAILURE",
client_ip=client_ip,
user_agent=user_agent,
details={"reason": reason},
risk_level="MEDIUM"
)
def log_session_expired(self, client_ip: str, session_id: str):
"""Log session expiration"""
self._log_security_event(
level="INFO",
event_type="SESSION_EXPIRED",
client_ip=client_ip,
details={"session_id": session_id[:8] + "..."},
risk_level="LOW"
)
def log_session_hijack_attempt(self, original_ip: str, attempted_ip: str, session_id: str):
"""Log potential session hijacking attempt"""
self._log_security_event(
level="ERROR",
event_type="SESSION_HIJACK_ATTEMPT",
client_ip=attempted_ip,
details={
"original_ip": original_ip,
"attempted_ip": attempted_ip,
"session_id": session_id[:8] + "..."
},
risk_level="HIGH"
)
def log_authentication_failure(self, client_ip: str, user_agent: str, reason: str):
"""Log authentication failure"""
self._log_security_event(
level="WARNING",
event_type="AUTH_FAILURE",
client_ip=client_ip,
user_agent=user_agent,
details={"reason": reason},
risk_level="MEDIUM"
)
def log_password_change(self, client_ip: str, user_agent: str, username: str):
"""Log password change event"""
self._log_security_event(
level="INFO",
event_type="PASSWORD_CHANGE",
client_ip=client_ip,
user_agent=user_agent,
details={"username": username, "message": f"Password changed for user: {username}"},
risk_level="LOW"
)
# Also log to event logger for Event Viewer
if self.event_logger:
from event_logger import EventCategory, EventType, EventSeverity
self.event_logger.log_event(
category=EventCategory.USER,
event_type=EventType.CONFIG_CHANGED,
title="Password Changed",
message=f"User '{username}' changed their password from IP: {client_ip}",
severity=EventSeverity.INFO,
details={"username": username, "client_ip": client_ip, "user_agent": user_agent}
)
def log_username_change(self, client_ip: str, user_agent: str, old_username: str, new_username: str):
"""Log username change event"""
self._log_security_event(
level="INFO",
event_type="USERNAME_CHANGE",
client_ip=client_ip,
user_agent=user_agent,
details={
"old_username": old_username,
"new_username": new_username,
"message": f"Username changed from {old_username} to {new_username}"
},
risk_level="LOW"
)
# Also log to event logger for Event Viewer
if self.event_logger:
from event_logger import EventCategory, EventType, EventSeverity
self.event_logger.log_event(
category=EventCategory.USER,
event_type=EventType.CONFIG_CHANGED,
title="Username Changed",
message=f"Username changed from '{old_username}' to '{new_username}' from IP: {client_ip}",
severity=EventSeverity.INFO,
details={"old_username": old_username, "new_username": new_username, "client_ip": client_ip, "user_agent": user_agent}
)
# Global security audit logger instance
security_audit = SecurityAuditLogger()

View File

@@ -0,0 +1,185 @@
"""
Rate Limiting System for DockMon
Provides protection against abuse and DoS attacks using token bucket algorithm
"""
import logging
import os
import time
from collections import defaultdict
from datetime import datetime
from typing import Dict, Tuple
from fastapi import Request, HTTPException, status, Depends
from .audit import security_audit
logger = logging.getLogger(__name__)
class RateLimiter:
"""
In-memory rate limiter using token bucket algorithm
Provides protection against abuse and DoS attacks
"""
def __init__(self):
# Start with generous initial tokens to allow immediate bursts
# This prevents legitimate users from getting rate limited on first use
self.clients = defaultdict(lambda: {"tokens": 100, "last_update": time.time(), "violations": 0})
# Get rate limits from environment or use production-friendly defaults
self.limits = {
# endpoint_pattern: (requests_per_minute, burst_limit, violation_threshold)
"default": (
int(os.getenv('DOCKMON_RATE_LIMIT_DEFAULT', 120)),
int(os.getenv('DOCKMON_RATE_BURST_DEFAULT', 20)),
int(os.getenv('DOCKMON_RATE_VIOLATIONS_DEFAULT', 8))
),
"auth": (
int(os.getenv('DOCKMON_RATE_LIMIT_AUTH', 10)), # 10 per minute for auth
int(os.getenv('DOCKMON_RATE_BURST_AUTH', 5)), # Lower burst
int(os.getenv('DOCKMON_RATE_VIOLATIONS_AUTH', 10)) # More lenient violations
),
"hosts": (
int(os.getenv('DOCKMON_RATE_LIMIT_HOSTS', 60)),
int(os.getenv('DOCKMON_RATE_BURST_HOSTS', 15)),
int(os.getenv('DOCKMON_RATE_VIOLATIONS_HOSTS', 8))
),
"containers": (
int(os.getenv('DOCKMON_RATE_LIMIT_CONTAINERS', 900)), # Increased for logs polling with multiple containers
int(os.getenv('DOCKMON_RATE_BURST_CONTAINERS', 180)),
int(os.getenv('DOCKMON_RATE_VIOLATIONS_CONTAINERS', 25))
),
"notifications": (
int(os.getenv('DOCKMON_RATE_LIMIT_NOTIFICATIONS', 30)),
int(os.getenv('DOCKMON_RATE_BURST_NOTIFICATIONS', 10)),
int(os.getenv('DOCKMON_RATE_VIOLATIONS_NOTIFICATIONS', 5))
),
}
logger.info(f"Rate limiting configured: Default={self.limits['default'][0]}/min, "
f"Auth={self.limits['auth'][0]}/min, Containers={self.limits['containers'][0]}/min")
self.banned_clients = {} # IP -> ban_until_timestamp
def _get_limit(self, endpoint: str) -> tuple:
"""Get rate limit for specific endpoint"""
for pattern, limits in self.limits.items():
if pattern in endpoint.lower():
return limits
return self.limits["default"]
def _cleanup_old_entries(self):
"""Clean up old entries to prevent memory leaks"""
current_time = time.time()
# Remove clients not seen for 1 hour
cutoff_time = current_time - 3600
old_clients = [ip for ip, data in self.clients.items()
if data["last_update"] < cutoff_time]
for ip in old_clients:
del self.clients[ip]
# Remove expired bans
expired_bans = [ip for ip, ban_time in self.banned_clients.items()
if current_time > ban_time]
for ip in expired_bans:
del self.banned_clients[ip]
def is_allowed(self, client_ip: str, endpoint: str) -> Tuple[bool, str]:
"""Check if request is allowed and return (allowed, reason)"""
current_time = time.time()
# Cleanup old entries periodically
if current_time % 300 < 1: # Every 5 minutes
self._cleanup_old_entries()
# Check if client is banned
if client_ip in self.banned_clients:
if current_time < self.banned_clients[client_ip]:
return False, f"IP banned until {datetime.fromtimestamp(self.banned_clients[client_ip]).isoformat()}"
else:
# Ban expired, remove from banned list
del self.banned_clients[client_ip]
requests_per_minute, burst_limit, violation_threshold = self._get_limit(endpoint)
client_data = self.clients[client_ip]
# Token bucket algorithm with burst support
time_passed = current_time - client_data["last_update"]
tokens_to_add = (time_passed / 60.0) * requests_per_minute
# Allow bursting up to burst_limit tokens
client_data["tokens"] = min(burst_limit, client_data["tokens"] + tokens_to_add)
client_data["last_update"] = current_time
# Check if request is allowed
if client_data["tokens"] >= 1.0:
client_data["tokens"] -= 1.0
return True, "OK"
else:
# Rate limit exceeded
client_data["violations"] += 1
# Check if violations exceed threshold - ban the client
if client_data["violations"] >= violation_threshold:
ban_duration = 60 # 60 seconds ban (reduced from 15 minutes for better UX)
self.banned_clients[client_ip] = current_time + ban_duration
logger.warning(f"IP {client_ip} banned for 60 seconds due to {violation_threshold} rate limit violations")
# Security audit log
security_audit.log_rate_limit_violation(
client_ip=client_ip,
endpoint=endpoint,
violations=client_data["violations"],
banned=True
)
return False, f"IP banned for repeated violations"
# Log rate limit violation
security_audit.log_rate_limit_violation(
client_ip=client_ip,
endpoint=endpoint,
violations=client_data["violations"],
banned=False
)
return False, f"Rate limit exceeded. Try again in {int(60 - time_passed)} seconds"
def get_stats(self) -> dict:
"""Get rate limiter statistics"""
return {
"active_clients": len(self.clients),
"banned_clients": len(self.banned_clients),
"total_violations": sum(data["violations"] for data in self.clients.values())
}
# Global rate limiter instance
rate_limiter = RateLimiter()
def get_rate_limit_dependency(endpoint_type: str = "default"):
"""Create a dependency for rate limiting specific endpoint types"""
def rate_limit_check(request: Request):
client_ip = request.client.host
endpoint_name = f"{endpoint_type}_{request.url.path}"
allowed, reason = rate_limiter.is_allowed(client_ip, endpoint_name)
if not allowed:
logger.warning(f"Rate limit exceeded for {client_ip} on {endpoint_name}: {reason}")
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail=f"Rate limit exceeded: {reason}",
headers={"Retry-After": "60"}
)
return True
return rate_limit_check
# Rate limiting dependencies for different endpoint types
rate_limit_auth = Depends(get_rate_limit_dependency("auth"))
rate_limit_hosts = Depends(get_rate_limit_dependency("hosts"))
rate_limit_containers = Depends(get_rate_limit_dependency("containers"))
rate_limit_notifications = Depends(get_rate_limit_dependency("notifications"))
rate_limit_default = Depends(get_rate_limit_dependency("default"))