From 97133df5cec7250ea823202452fee3f8bbea8c56 Mon Sep 17 00:00:00 2001 From: julien Date: Fri, 14 Nov 2025 00:47:26 +0100 Subject: [PATCH] Monitoring --- komodo/.gitignore | 13 -- komodo/compose.env | 138 ------------------ komodo/mongo.compose.yaml | 84 ----------- komodo/periphery.compose.yaml | 43 ------ komodo/postgres.compose.yaml | 97 ------------ komodo/sqlite.compose.yaml | 77 ---------- monitoring/alertmanager/config.yml | 18 +++ monitoring/compose.env | 1 + monitoring/docker-compose-node.yaml | 96 ++++++++++++ monitoring/docker-compose.yaml | 96 ++++++++++++ .../grafana/dashboards/system-overview.json | 37 +++++ .../provisioning/dashboards/dashboards.yml | 12 ++ .../provisioning/datasources/datasource.yml | 8 + monitoring/prometheus/prometheus.yml | 34 +++++ .../prometheus/rules/container_alerts.yml | 29 ++++ monitoring/prometheus/rules/node_alerts.yml | 38 +++++ .../prometheus/rules/recording_rules.yml | 12 ++ odoo/docker-compose.yaml | 45 ++++++ 18 files changed, 426 insertions(+), 452 deletions(-) delete mode 100644 komodo/.gitignore delete mode 100644 komodo/compose.env delete mode 100644 komodo/mongo.compose.yaml delete mode 100644 komodo/periphery.compose.yaml delete mode 100644 komodo/postgres.compose.yaml delete mode 100644 komodo/sqlite.compose.yaml create mode 100644 monitoring/alertmanager/config.yml create mode 100644 monitoring/compose.env create mode 100644 monitoring/docker-compose-node.yaml create mode 100644 monitoring/docker-compose.yaml create mode 100644 monitoring/grafana/dashboards/system-overview.json create mode 100644 monitoring/grafana/provisioning/dashboards/dashboards.yml create mode 100644 monitoring/grafana/provisioning/datasources/datasource.yml create mode 100644 monitoring/prometheus/prometheus.yml create mode 100644 monitoring/prometheus/rules/container_alerts.yml create mode 100644 monitoring/prometheus/rules/node_alerts.yml create mode 100644 monitoring/prometheus/rules/recording_rules.yml create mode 100644 odoo/docker-compose.yaml diff --git a/komodo/.gitignore b/komodo/.gitignore deleted file mode 100644 index d49f1f2..0000000 --- a/komodo/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -target -node_modules -dist -.env -.env.development -.DS_Store -.idea - -/frontend/build -/lib/ts_client/build - -creds.toml -.dev diff --git a/komodo/compose.env b/komodo/compose.env deleted file mode 100644 index 220e865..0000000 --- a/komodo/compose.env +++ /dev/null @@ -1,138 +0,0 @@ -#################################### -# 🦎 KOMODO COMPOSE - VARIABLES 🦎 # -#################################### - -## These compose variables can be used with all Komodo deployment options. -## Pass these variables to the compose up command using `--env-file komodo/compose.env`. -## Additionally, they are passed to both Komodo Core and Komodo Periphery with `env_file: ./compose.env`, -## so you can pass any additional environment variables to Core / Periphery directly in this file as well. - -## Stick to a specific version, or use `latest` -COMPOSE_KOMODO_IMAGE_TAG=latest - -## Note: 🚨 Podman does NOT support local logging driver 🚨. See Podman options here: -## `https://docs.podman.io/en/v4.6.1/markdown/podman-run.1.html#log-driver-driver` -COMPOSE_LOGGING_DRIVER=local # Enable log rotation with the local driver. - -## DB credentials - Ignored for Sqlite -KOMODO_DB_USERNAME=admin -KOMODO_DB_PASSWORD=admin - -## Configure a secure passkey to authenticate between Core / Periphery. -KOMODO_PASSKEY=a6YKCQi5%fpYcKc8 - -## Set your time zone for schedules -## https://en.wikipedia.org/wiki/List_of_tz_database_time_zones -TZ=Europe/Paris - -#=-------------------------=# -#= Komodo Core Environment =# -#=-------------------------=# - -## Full variable list + descriptions are available here: -## 🦎 https://github.com/moghtech/komodo/blob/main/config/core.config.toml 🦎 - -## Note. Secret variables also support `${VARIABLE}_FILE` syntax to pass docker compose secrets. -## Docs: https://docs.docker.com/compose/how-tos/use-secrets/#examples - -## Used for Oauth / Webhook url suggestion / Caddy reverse proxy. -KOMODO_HOST=https://demo.komo.do -## Displayed in the browser tab. -KOMODO_TITLE=Komodo -## Create a server matching this address as the "first server". -## Use `https://host.docker.internal:8120` when using systemd-managed Periphery. -KOMODO_FIRST_SERVER=https://periphery:8120 -## Make all buttons just double-click, rather than the full confirmation dialog. -KOMODO_DISABLE_CONFIRM_DIALOG=false - -## Rate Komodo polls your servers for -## status / container status / system stats / alerting. -## Options: 1-sec, 5-sec, 15-sec, 1-min, 5-min. -## Default: 15-sec -KOMODO_MONITORING_INTERVAL="15-sec" -## Rate Komodo polls Resources for updates, -## like outdated commit hash. -## Options: 1-min, 5-min, 15-min, 30-min, 1-hr. -## Default: 5-min -KOMODO_RESOURCE_POLL_INTERVAL="5-min" - -## Used to auth incoming webhooks. Alt: KOMODO_WEBHOOK_SECRET_FILE -KOMODO_WEBHOOK_SECRET=a_random_secret -## Used to generate jwt. Alt: KOMODO_JWT_SECRET_FILE -KOMODO_JWT_SECRET=a6YKCQi5%fpYcKc8 - -## Enable login with username + password. -KOMODO_LOCAL_AUTH=true -## Disable new user signups. -KOMODO_DISABLE_USER_REGISTRATION=false -## All new logins are auto enabled -KOMODO_ENABLE_NEW_USERS=false -## Disable non-admins from creating new resources. -KOMODO_DISABLE_NON_ADMIN_CREATE=false -## Allows all users to have Read level access to all resources. -KOMODO_TRANSPARENT_MODE=false - -## Time to live for jwt tokens. -## Options: 1-hr, 12-hr, 1-day, 3-day, 1-wk, 2-wk -KOMODO_JWT_TTL="3-day" - -## OIDC Login -KOMODO_OIDC_ENABLED=false -## Must reachable from Komodo Core container -# KOMODO_OIDC_PROVIDER=https://oidc.provider.internal/application/o/komodo -## Change the host to one reachable be reachable by users (optional if it is the same as above). -## DO NOT include the `path` part of the URL. -# KOMODO_OIDC_REDIRECT_HOST=https://oidc.provider.external -## Your OIDC client id -# KOMODO_OIDC_CLIENT_ID= # Alt: KOMODO_OIDC_CLIENT_ID_FILE -## Your OIDC client secret. -## If your provider supports PKCE flow, this can be ommitted. -# KOMODO_OIDC_CLIENT_SECRET= # Alt: KOMODO_OIDC_CLIENT_SECRET_FILE -## Make usernames the full email. -## Note. This does not work for all OIDC providers. -# KOMODO_OIDC_USE_FULL_EMAIL=true -## Add additional trusted audiences for token claims verification. -## Supports comma separated list, and passing with _FILE (for compose secrets). -# KOMODO_OIDC_ADDITIONAL_AUDIENCES=abc,123 # Alt: KOMODO_OIDC_ADDITIONAL_AUDIENCES_FILE - -## Github Oauth -KOMODO_GITHUB_OAUTH_ENABLED=false -# KOMODO_GITHUB_OAUTH_ID= # Alt: KOMODO_GITHUB_OAUTH_ID_FILE -# KOMODO_GITHUB_OAUTH_SECRET= # Alt: KOMODO_GITHUB_OAUTH_SECRET_FILE - -## Google Oauth -KOMODO_GOOGLE_OAUTH_ENABLED=false -# KOMODO_GOOGLE_OAUTH_ID= # Alt: KOMODO_GOOGLE_OAUTH_ID_FILE -# KOMODO_GOOGLE_OAUTH_SECRET= # Alt: KOMODO_GOOGLE_OAUTH_SECRET_FILE - -## Aws - Used to launch Builder instances and ServerTemplate instances. -KOMODO_AWS_ACCESS_KEY_ID= # Alt: KOMODO_AWS_ACCESS_KEY_ID_FILE -KOMODO_AWS_SECRET_ACCESS_KEY= # Alt: KOMODO_AWS_SECRET_ACCESS_KEY_FILE - -## Hetzner - Used to launch ServerTemplate instances -## Hetzner Builder not supported due to Hetzner pay-by-the-hour pricing model -KOMODO_HETZNER_TOKEN= # Alt: KOMODO_HETZNER_TOKEN_FILE - -#=------------------------------=# -#= Komodo Periphery Environment =# -#=------------------------------=# - -## Full variable list + descriptions are available here: -## 🦎 https://github.com/moghtech/komodo/blob/main/config/periphery.config.toml 🦎 - -## Periphery passkeys must include KOMODO_PASSKEY to authenticate. -PERIPHERY_PASSKEYS=${KOMODO_PASSKEY} - -## Specify the root directory used by Periphery agent. -PERIPHERY_ROOT_DIRECTORY=/etc/komodo - -## Enable SSL using self signed certificates. -## Connect to Periphery at https://address:8120. -PERIPHERY_SSL_ENABLED=true - -## If the disk size is overreporting, can use one of these to -## whitelist / blacklist the disks to filter them, whichever is easier. -## Accepts comma separated list of paths. -## Usually whitelisting just /etc/hostname gives correct size. -PERIPHERY_INCLUDE_DISK_MOUNTS=/etc/hostname -# PERIPHERY_EXCLUDE_DISK_MOUNTS=/snap,/etc/repos diff --git a/komodo/mongo.compose.yaml b/komodo/mongo.compose.yaml deleted file mode 100644 index 8fd74bb..0000000 --- a/komodo/mongo.compose.yaml +++ /dev/null @@ -1,84 +0,0 @@ -################################ -# 🦎 KOMODO COMPOSE - MONGO 🦎 # -################################ - -## This compose file will deploy: -## 1. MongoDB -## 2. Komodo Core -## 3. Komodo Periphery - -services: - mongo: - image: mongo - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - command: --quiet --wiredTigerCacheSizeGB 0.25 - restart: unless-stopped - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - # ports: - # - 27017:27017 - env_file: ./compose.env - volumes: - - mongo-data:/data/db - - mongo-config:/data/configdb - environment: - MONGO_INITDB_ROOT_USERNAME: ${KOMODO_DB_USERNAME:-admin} - MONGO_INITDB_ROOT_PASSWORD: ${KOMODO_DB_PASSWORD:-j6QZCeVcA9tQHND4} - - core: - image: ghcr.io/moghtech/komodo-core:${COMPOSE_KOMODO_IMAGE_TAG:-latest} - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - depends_on: - - mongo - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - ports: - - 9120:9120 - env_file: ./compose.env - environment: - KOMODO_DATABASE_ADDRESS: mongo:27017 - KOMODO_DATABASE_USERNAME: ${KOMODO_DB_USERNAME:-admin} - KOMODO_DATABASE_PASSWORD: ${KOMODO_DB_PASSWORD:-j6QZCeVcA9tQHND4} - volumes: - ## Core cache for repos for latest commit hash / contents - - repo-cache:/repo-cache - ## Store sync files on server - # - /path/to/syncs:/syncs - ## Optionally mount a custom core.config.toml - # - /path/to/core.config.toml:/config/config.toml - ## Allows for systemd Periphery connection at - ## "http://host.docker.internal:8120" - # extra_hosts: - # - host.docker.internal:host-gateway - - ## Deploy Periphery container using this block, - ## or deploy the Periphery binary with systemd using - ## https://github.com/moghtech/komodo/tree/main/scripts - periphery: - image: ghcr.io/moghtech/komodo-periphery:${COMPOSE_KOMODO_IMAGE_TAG:-latest} - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - env_file: ./compose.env - volumes: - ## Mount external docker socket - - /var/run/docker.sock:/var/run/docker.sock - ## Allow Periphery to see processes outside of container - - /proc:/proc - ## Specify the Periphery agent root directory. - ## Must be the same inside and outside the container, - ## or docker will get confused. See https://github.com/moghtech/komodo/discussions/180. - ## Default: /etc/komodo. - - ${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo}:${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo} - -volumes: - # Mongo - mongo-data: - mongo-config: - # Core - repo-cache: diff --git a/komodo/periphery.compose.yaml b/komodo/periphery.compose.yaml deleted file mode 100644 index 784abef..0000000 --- a/komodo/periphery.compose.yaml +++ /dev/null @@ -1,43 +0,0 @@ -#################################### -# 🦎 KOMODO COMPOSE - PERIPHERY 🦎 # -#################################### - -## This compose file will deploy: -## 1. Komodo Periphery - -services: - periphery: - container_name: komodo-periphery - image: ghcr.io/moghtech/komodo-periphery:${COMPOSE_KOMODO_IMAGE_TAG:-latest} - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - ## https://komo.do/docs/connect-servers#configuration - environment: - PERIPHERY_ROOT_DIRECTORY: ${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo} - ## Pass the same passkey as used by the Komodo Core connecting to this Periphery agent. - PERIPHERY_PASSKEYS: abc123 - ## If the disk size is overreporting, can use one of these to - ## whitelist / blacklist the disks to filter them, whichever is easier. - ## Accepts comma separated list of paths. - ## Usually whitelisting just /etc/hostname gives correct size for single root disk. - PERIPHERY_INCLUDE_DISK_MOUNTS: /etc/hostname - # PERIPHERY_EXCLUDE_DISK_MOUNTS: /snap,/etc/repos - volumes: - ## Mount external docker socket - - /var/run/docker.sock:/var/run/docker.sock - ## Allow Periphery to see processes outside of container - - /proc:/proc - ## Specify the Periphery agent root directory. - ## Must be the same inside and outside the container, - ## or docker will get confused. See https://github.com/moghtech/komodo/discussions/180. - ## Default: /etc/komodo. - - ${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo}:${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo} - ## If periphery is being run remote from the core server, ports need - ## to be exposed on the host. - ports: - - 8120:8120 - ## If you want to use a custom periphery config file, use command to pass it to periphery. - # command: periphery --config-path ${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo}/periphery.config.toml diff --git a/komodo/postgres.compose.yaml b/komodo/postgres.compose.yaml deleted file mode 100644 index 70cb4a1..0000000 --- a/komodo/postgres.compose.yaml +++ /dev/null @@ -1,97 +0,0 @@ -################################### -# 🦎 KOMODO COMPOSE - POSTGRES 🦎 # -################################### - -## This compose file will deploy: -## 1. Postgres + FerretDB Mongo adapter (https://www.ferretdb.com) -## 2. Komodo Core -## 3. Komodo Periphery - -services: - postgres: - container_name: komodo-db - image: postgres:17 - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - # ports: - # - 5432:5432 - volumes: - - pg-data:/var/lib/postgresql/data - environment: - - POSTGRES_USER=${KOMODO_DB_USERNAME} - - POSTGRES_PASSWORD=${KOMODO_DB_PASSWORD} - - POSTGRES_DB=${KOMODO_DATABASE_DB_NAME:-komodo} - - ferretdb: - container_name: komodo-ferretdb - image: ghcr.io/ferretdb/ferretdb:1 - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - depends_on: - - postgres - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - # ports: - # - 27017:27017 - environment: - - FERRETDB_POSTGRESQL_URL=postgres://postgres:5432/${KOMODO_DATABASE_DB_NAME:-komodo} - - core: - container_name: komodo-core - image: ghcr.io/moghtech/komodo-core:${COMPOSE_KOMODO_IMAGE_TAG:-latest} - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - depends_on: - - ferretdb - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - ports: - - 9120:9120 - env_file: ./compose.env - environment: - KOMODO_DATABASE_URI: mongodb://${KOMODO_DB_USERNAME}:${KOMODO_DB_PASSWORD}@ferretdb:27017/${KOMODO_DATABASE_DB_NAME:-komodo}?authMechanism=PLAIN - volumes: - ## Core cache for repos for latest commit hash / contents - - repo-cache:/repo-cache - ## Store sync files on server - # - /path/to/syncs:/syncs - ## Optionally mount a custom core.config.toml - # - /path/to/core.config.toml:/config/config.toml - ## Allows for systemd Periphery connection at - ## "http://host.docker.internal:8120" - # extra_hosts: - # - host.docker.internal:host-gateway - - ## Deploy Periphery container using this block, - ## or deploy the Periphery binary with systemd using - ## https://github.com/moghtech/komodo/tree/main/scripts - periphery: - container_name: komodo-periphery - image: ghcr.io/moghtech/komodo-periphery:${COMPOSE_KOMODO_IMAGE_TAG:-latest} - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - env_file: ./compose.env - volumes: - ## Mount external docker socket - - /var/run/docker.sock:/var/run/docker.sock - ## Allow Periphery to see processes outside of container - - /proc:/proc - ## Specify the Periphery agent root directory. - ## Must be the same inside and outside the container, - ## or docker will get confused. See https://github.com/moghtech/komodo/discussions/180. - ## Default: /etc/komodo. - - ${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo}:${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo} - -volumes: - # Postgres - pg-data: - # Core - repo-cache: diff --git a/komodo/sqlite.compose.yaml b/komodo/sqlite.compose.yaml deleted file mode 100644 index cfdd736..0000000 --- a/komodo/sqlite.compose.yaml +++ /dev/null @@ -1,77 +0,0 @@ -################################# -# 🦎 KOMODO COMPOSE - SQLITE 🦎 # -################################# - -## This compose file will deploy: -## 1. Sqlite + FerretDB Mongo adapter (https://www.ferretdb.com) -## 2. Komodo Core -## 3. Komodo Periphery - -services: - ferretdb: - image: ghcr.io/ferretdb/ferretdb:1 - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - # ports: - # - 27017:27017 - volumes: - - sqlite-data:/state - environment: - - FERRETDB_HANDLER=sqlite - - core: - image: ghcr.io/moghtech/komodo-core:${COMPOSE_KOMODO_IMAGE_TAG:-latest} - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - depends_on: - - ferretdb - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - ports: - - 9120:9120 - env_file: ./compose.env - environment: - KOMODO_DATABASE_ADDRESS: ferretdb - volumes: - ## Core cache for repos for latest commit hash / contents - - repo-cache:/repo-cache - ## Store sync files on server - # - /path/to/syncs:/syncs - ## Optionally mount a custom core.config.toml - # - /path/to/core.config.toml:/config/config.toml - ## Allows for systemd Periphery connection at - ## "http://host.docker.internal:8120" - # extra_hosts: - # - host.docker.internal:host-gateway - - ## Deploy Periphery container using this block, - ## or deploy the Periphery binary with systemd using - ## https://github.com/moghtech/komodo/tree/main/scripts - periphery: - image: ghcr.io/moghtech/komodo-periphery:${COMPOSE_KOMODO_IMAGE_TAG:-latest} - labels: - komodo.skip: # Prevent Komodo from stopping with StopAllContainers - restart: unless-stopped - logging: - driver: ${COMPOSE_LOGGING_DRIVER:-local} - env_file: ./compose.env - volumes: - ## Mount external docker socket - - /var/run/docker.sock:/var/run/docker.sock - ## Allow Periphery to see processes outside of container - - /proc:/proc - ## Specify the Periphery agent root directory. - ## Must be the same inside and outside the container, - ## or docker will get confused. See https://github.com/moghtech/komodo/discussions/180. - ## Default: /etc/komodo. - - ${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo}:${PERIPHERY_ROOT_DIRECTORY:-/etc/komodo} - -volumes: - # Sqlite - sqlite-data: - # Core - repo-cache: \ No newline at end of file diff --git a/monitoring/alertmanager/config.yml b/monitoring/alertmanager/config.yml new file mode 100644 index 0000000..9e4878b --- /dev/null +++ b/monitoring/alertmanager/config.yml @@ -0,0 +1,18 @@ +global: + resolve_timeout: 5m + +route: + group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'email-notifications' + +receivers: +- name: 'email-notifications' + email_configs: + - to: 'admin@delmar.bzh' + from: 'noreply@delmar.bzh' + smarthost: pro1.mail.ovh.net:58 + auth_username: 'admin@delmar.bzh' + auth_password: 'sxS4GA8rBfmFkCFL' diff --git a/monitoring/compose.env b/monitoring/compose.env new file mode 100644 index 0000000..9088cb6 --- /dev/null +++ b/monitoring/compose.env @@ -0,0 +1 @@ +GRAFANA_PASSWORD="XbJ6do@xT8478c" diff --git a/monitoring/docker-compose-node.yaml b/monitoring/docker-compose-node.yaml new file mode 100644 index 0000000..185aa13 --- /dev/null +++ b/monitoring/docker-compose-node.yaml @@ -0,0 +1,96 @@ +# bob (mon.delmar.bzh) +--- +name: monitoring + +volumes: + prometheus_data: {} + grafana_data: {} + +networks: + monitoring: + driver: bridge + +services: + prometheus: + image: prom/prometheus:latest + container_name: prometheus + volumes: + - ./prometheus:/etc/prometheus + - prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + - '--storage.tsdb.retention.time=15d' + - '--storage.tsdb.wal-compression' + - '--web.enable-lifecycle' + ports: + - "9090:9090" + networks: + - monitoring + restart: unless-stopped + + node-exporter: + image: prom/node-exporter:latest + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($|/)' + ports: + - "9100:9100" + networks: + - monitoring + restart: unless-stopped + + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: cadvisor + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + - /dev/disk/:/dev/disk:ro + ports: + - "8080:8080" + networks: + - monitoring + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: grafana + user: "1000:1000" + volumes: + - grafana_data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning + - ./grafana/dashboards:/var/lib/grafana/dashboards + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD} + - GF_USERS_ALLOW_SIGN_UP=false + ports: + - "11000:3000" + networks: + - monitoring + restart: unless-stopped + + alertmanager: + image: prom/alertmanager:latest + container_name: alertmanager + volumes: + - ./alertmanager:/etc/alertmanager + command: + - '--config.file=/etc/alertmanager/config.yml' + - '--storage.path=/alertmanager' + ports: + - "9093:9093" + networks: + - monitoring + restart: unless-stopped diff --git a/monitoring/docker-compose.yaml b/monitoring/docker-compose.yaml new file mode 100644 index 0000000..185aa13 --- /dev/null +++ b/monitoring/docker-compose.yaml @@ -0,0 +1,96 @@ +# bob (mon.delmar.bzh) +--- +name: monitoring + +volumes: + prometheus_data: {} + grafana_data: {} + +networks: + monitoring: + driver: bridge + +services: + prometheus: + image: prom/prometheus:latest + container_name: prometheus + volumes: + - ./prometheus:/etc/prometheus + - prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + - '--storage.tsdb.retention.time=15d' + - '--storage.tsdb.wal-compression' + - '--web.enable-lifecycle' + ports: + - "9090:9090" + networks: + - monitoring + restart: unless-stopped + + node-exporter: + image: prom/node-exporter:latest + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($|/)' + ports: + - "9100:9100" + networks: + - monitoring + restart: unless-stopped + + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: cadvisor + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + - /dev/disk/:/dev/disk:ro + ports: + - "8080:8080" + networks: + - monitoring + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: grafana + user: "1000:1000" + volumes: + - grafana_data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning + - ./grafana/dashboards:/var/lib/grafana/dashboards + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD} + - GF_USERS_ALLOW_SIGN_UP=false + ports: + - "11000:3000" + networks: + - monitoring + restart: unless-stopped + + alertmanager: + image: prom/alertmanager:latest + container_name: alertmanager + volumes: + - ./alertmanager:/etc/alertmanager + command: + - '--config.file=/etc/alertmanager/config.yml' + - '--storage.path=/alertmanager' + ports: + - "9093:9093" + networks: + - monitoring + restart: unless-stopped diff --git a/monitoring/grafana/dashboards/system-overview.json b/monitoring/grafana/dashboards/system-overview.json new file mode 100644 index 0000000..e94c933 --- /dev/null +++ b/monitoring/grafana/dashboards/system-overview.json @@ -0,0 +1,37 @@ +{ + "title": "System Overview", + "uid": "system-overview", + "version": 1, + "panels": [ + { + "title": "CPU Usage", + "type": "gauge", + "gridPos": {"h": 8, "w": 6, "x": 0, "y": 0}, + "targets": [{"expr": "node:cpu_usage:avg5m"}] + }, + { + "title": "Memory Usage", + "type": "gauge", + "gridPos": {"h": 8, "w": 6, "x": 6, "y": 0}, + "targets": [{"expr": "(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100"}] + }, + { + "title": "Disk Usage", + "type": "gauge", + "gridPos": {"h": 8, "w": 6, "x": 12, "y": 0}, + "targets": [{"expr": "(node_filesystem_size_bytes{mountpoint=\"/\"} - node_filesystem_free_bytes{mountpoint=\"/\"}) / node_filesystem_size_bytes{mountpoint=\"/\"} * 100"}] + }, + { + "title": "Container CPU Usage", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, + "targets": [{"expr": "sum by(name) (rate(container_cpu_usage_seconds_total{name!=\"\"}[5m])) * 100"}] + }, + { + "title": "Container Memory Usage", + "type": "graph", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "targets": [{"expr": "sum by(name) (container_memory_usage_bytes{name!=\"\"})"}] + } + ] +} diff --git a/monitoring/grafana/provisioning/dashboards/dashboards.yml b/monitoring/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..3858559 --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'Default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards diff --git a/monitoring/grafana/provisioning/datasources/datasource.yml b/monitoring/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000..86fd346 --- /dev/null +++ b/monitoring/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,8 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 0000000..a3f866f --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,34 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +# Alertmanager configuration +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 + +# Load rules once and periodically evaluate them +rule_files: + - "rules/*.yml" + +# Scrape configurations +scrape_configs: + # System metrics change frequently, scrape more often + - job_name: 'node-exporter' + scrape_interval: 10s + static_configs: + - targets: ['node-exporter:11910'] + + # Container metrics are also volatile + - job_name: 'cadvisor' + scrape_interval: 10s + static_configs: + - targets: ['cadvisor:11080'] + + # Prometheus itself changes slowly, scrape less frequently + - job_name: 'prometheus' + scrape_interval: 30s + static_configs: + - targets: ['localhost:11090'] diff --git a/monitoring/prometheus/rules/container_alerts.yml b/monitoring/prometheus/rules/container_alerts.yml new file mode 100644 index 0000000..0d44289 --- /dev/null +++ b/monitoring/prometheus/rules/container_alerts.yml @@ -0,0 +1,29 @@ +groups: +- name: container_alerts + rules: + - alert: ContainerRestarting + expr: delta(container_start_time_seconds{name!=""}[15m]) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: "Container restarting ({{ $labels.name }})" + description: "Container {{ $labels.name }} has restarted in the last 15 minutes" + + - alert: ContainerHighMemoryUsage + expr: (container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""} * 100) > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "Container high memory usage ({{ $labels.name }})" + description: "Container {{ $labels.name }} memory usage is {{ $value }}%" + + - alert: ContainerCPUThrottling + expr: rate(container_cpu_cfs_throttled_periods_total{name!=""}[5m]) / rate(container_cpu_cfs_periods_total{name!=""}[5m]) > 0.25 + for: 5m + labels: + severity: warning + annotations: + summary: "Container CPU throttling ({{ $labels.name }})" + description: "Container {{ $labels.name }} is being throttled {{ $value | humanizePercentage }}" diff --git a/monitoring/prometheus/rules/node_alerts.yml b/monitoring/prometheus/rules/node_alerts.yml new file mode 100644 index 0000000..efe10c6 --- /dev/null +++ b/monitoring/prometheus/rules/node_alerts.yml @@ -0,0 +1,38 @@ +groups: +- name: node_alerts + rules: + - alert: HighCPULoad + expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "High CPU load (instance {{ $labels.instance }})" + description: "CPU load is > 80%\n VALUE = {{ $value }}%\n LABELS: {{ $labels }}" + + - alert: HighMemoryLoad + expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "High memory load (instance {{ $labels.instance }})" + description: "Memory load is > 80%\n VALUE = {{ $value }}%\n LABELS: {{ $labels }}" + + - alert: HighDiskUsage + expr: (node_filesystem_size_bytes{fstype=~"ext4|xfs"} - node_filesystem_free_bytes{fstype=~"ext4|xfs"}) / node_filesystem_size_bytes{fstype=~"ext4|xfs"} * 100 > 85 + for: 5m + labels: + severity: warning + annotations: + summary: "High disk usage (instance {{ $labels.instance }})" + description: "Disk usage is > 85%\n VALUE = {{ $value }}%\n LABELS: {{ $labels }}" + + - alert: UnusualMemoryGrowth + expr: deriv(node_memory_MemAvailable_bytes[30m]) < -10 * 1024 * 1024 + for: 10m + labels: + severity: warning + annotations: + summary: "Unusual memory consumption rate (instance {{ $labels.instance }})" + description: "Memory is being consumed at a rate of more than 10MB/min\n VALUE = {{ $value | humanize }}B/s" diff --git a/monitoring/prometheus/rules/recording_rules.yml b/monitoring/prometheus/rules/recording_rules.yml new file mode 100644 index 0000000..5e7fa18 --- /dev/null +++ b/monitoring/prometheus/rules/recording_rules.yml @@ -0,0 +1,12 @@ +groups: +- name: recording_rules + interval: 1m + rules: + - record: node:cpu_usage:avg5m + expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) + + - record: node:memory_usage:percent + expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 + + - record: container:cpu_usage:avg5m + expr: sum by(name) (rate(container_cpu_usage_seconds_total{name!=""}[5m])) * 100 diff --git a/odoo/docker-compose.yaml b/odoo/docker-compose.yaml new file mode 100644 index 0000000..953dc4c --- /dev/null +++ b/odoo/docker-compose.yaml @@ -0,0 +1,45 @@ +# sheldon +--- +name: odoo + +volumes: + odoo-data: + odoo-config: + odoo-addons: + postgres_data: + +services: + app: + image: odoo + container_name: odoo-app + depends_on: + - db + ports: + - "15069:8069" + environment: + - HOST=db + - USER=${ODOO_USER:-admin} + - PASSWORD=${ODOO_PASSWORD:-puW6KHfe3viQRyR7} + volumes: + - odoo-data:/var/lib/odoo + - odoo-config:/etc/odoo + - odoo-addons:/mnt/extra-addons + restart: unless-stopped + + db: + image: postgres + container_name: odoo-postgres + environment: + TZ: Europe/Paris + POSTGRES_USER: ${POSTGRES_USER:-admin} + POSTGRES_DB: ${POSTGRES_DB:-postgres} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-puW6KHfe3viQRyR7} + PGDATA: /var/lib/postgresql/data/pgdata + volumes: + - postgres_data:/var/lib/postgresql/data/pgdata + restart: always + healthcheck: + test: [ "CMD-SHELL", "pg_isready -d $${POSTGRES_DB} -U $${POSTGRES_USER}" ] + interval: 10s + timeout: 5s + retries: 5