From 9b7b5d364254e6b345383f2b860e272f62fe6f01 Mon Sep 17 00:00:00 2001 From: Tobias Reisinger Date: Mon, 7 Oct 2024 21:17:35 +0200 Subject: [PATCH] Replace uptime kuma with gatus --- Makefile | 6 ++ playbooks/filter_plugins/gatus.py | 86 +++++++++++++++++++ roles/gatus/tasks/main.yml | 27 ++++++ roles/gatus/vars/main.yml | 69 +++++++++++++++ roles/healthcheck/files/data/http | 2 +- roles/uptime_kuma/tasks/main.yml | 12 --- roles/uptime_kuma/vars/main.yml | 16 ---- services.auto.tfvars | 137 ++++++++++++++++++++++++++---- variables.tf | 5 ++ 9 files changed, 315 insertions(+), 45 deletions(-) create mode 100644 playbooks/filter_plugins/gatus.py create mode 100644 roles/gatus/tasks/main.yml create mode 100644 roles/gatus/vars/main.yml delete mode 100644 roles/uptime_kuma/tasks/main.yml delete mode 100644 roles/uptime_kuma/vars/main.yml diff --git a/Makefile b/Makefile index 3c534c9..0ac6b33 100644 --- a/Makefile +++ b/Makefile @@ -44,3 +44,9 @@ dns: ./types-dnscontrol.d.ts ./dns/hosts.json ./dns/services.json dns-check: ./types-dnscontrol.d.ts ./dns/hosts.json ./dns/services.json dnscontrol check-creds ovh + +all: + $(MAKE) tofu + $(MAKE) dns + @printf "\n=====\n\n" + ansible-playbook ./playbooks/serguzim.net.yml diff --git a/playbooks/filter_plugins/gatus.py b/playbooks/filter_plugins/gatus.py new file mode 100644 index 0000000..99971f4 --- /dev/null +++ b/playbooks/filter_plugins/gatus.py @@ -0,0 +1,86 @@ +class FilterModule(object): + def filters(self): + return { + 'hosts_to_gatus': self.hosts_to_gatus, + 'vault_hosts_backup_to_gatus': self.vault_hosts_backup_to_gatus, + 'services_to_gatus': self.services_to_gatus, + } + + default_alerts = [ + { + "type": "ntfy", + "send-on-resolved": True, + }, + { + "type": "email", + "send-on-resolved": True, + }, + ] + + def hosts_to_gatus(self, hosts): + result = [] + for host in hosts.values(): + result.append({ + "name": host["hostname"], + "url": f"icmp://{host['fqdn']}", + "group": "1-hosts", + "conditions": [ + "[CONNECTED] == true", + ], + "alerts": self.default_alerts, + }) + return result + + def vault_hosts_backup_to_gatus(self, hosts): + result = [] + for name, host_data in hosts.items(): + result.append({ + "name": f"backup@{name}", + "group": "8-backups", + "token": host_data["backup"]["gatus_token"], + "alerts": self.default_alerts, + }) + return result + + def services_to_gatus(self, services): + result = [] + + default_conditions = [ + "[STATUS] == any(200, 204)", + ] + + for service in services: + if mon := service.get("monitoring"): + if service.get("dns"): + dns = service["dns"][0] + url = "https://" + if dns.get("target") != "@": + url += f"{dns["target"]}." + url += dns['domain'] + + if mon_url := mon.get("url"): + if mon_url.startswith("/"): + url += mon_url + else: + url = mon_url + + if conditions := mon.get("conditions"): + if conditions[0] == "DEFAULT": + conditions.pop(0) + conditions[:0] = default_conditions + else: + conditions = conditions + else: + conditions = default_conditions + + new_endpoint = { + "name": service["name"], + "group": mon.get("group"), + "url": url, + "conditions": conditions, + "alerts": self.default_alerts, + } + + result.append(new_endpoint) + + return result diff --git a/roles/gatus/tasks/main.yml b/roles/gatus/tasks/main.yml new file mode 100644 index 0000000..44d25ee --- /dev/null +++ b/roles/gatus/tasks/main.yml @@ -0,0 +1,27 @@ +--- +- name: Set common facts + ansible.builtin.import_tasks: tasks/set-default-facts.yml + +- name: Deploy {{ svc.name }} + vars: + svc: "{{ gatus_svc }}" + yml: "{{ gatus_yml }}" + compose: "{{ gatus_compose }}" + block: + - name: Import prepare tasks for common service + ansible.builtin.import_tasks: tasks/prepare-common-service.yml + + - name: Template config + ansible.builtin.template: + src: yml.j2 + dest: "{{ (service_path, 'config.yaml') | path_join }}" + mode: "0644" + register: cmd_result + + - name: Set the docker force-recreate flag + ansible.builtin.set_fact: + docker_force_recreate: --force-recreate + when: cmd_result.changed # noqa: no-handler We need to handle the restart per service. Handlers don't support variables. + + - name: Import start tasks for common service + ansible.builtin.import_tasks: tasks/start-common-service.yml diff --git a/roles/gatus/vars/main.yml b/roles/gatus/vars/main.yml new file mode 100644 index 0000000..d98a723 --- /dev/null +++ b/roles/gatus/vars/main.yml @@ -0,0 +1,69 @@ +--- +gatus_svc: + domain: status.serguzim.me + name: gatus + port: 8080 + +gatus_external_endpoints_backups: "{{ vault_hosts | vault_hosts_backup_to_gatus() }}" + +gatus_endpoints_hosts: "{{ opentofu.hosts | hosts_to_gatus() }}" +gatus_endpoints_services: "{{ all_services | services_to_gatus() }}" + +gatus_endpoints_other: + - name: matrix-federation + url: https://federationtester.matrix.org/api/report?server_name=msrg.cc + group: 9-external + interval: 5m + conditions: + - '[STATUS] == 200' + - '[BODY].FederationOK == true' + ui: + hide-url: true + alerts: + - type: ntfy + send-on-resolved: true + - type: email + send-on-resolved: true + - name: healthchecks-io + url: "{{ opentofu.healthchecksio.status.ping_url }}" + group: 9-external + interval: 5m + conditions: + - '[STATUS] == 200' + ui: + hide-url: true + +gatus_yml: + storage: + type: sqlite + path: /data/data.db + + connectivity: + checker: + target: 1.1.1.1:53 + interval: 60s + + security: + oidc: + issuer-url: "{{ opentofu.authentik_data.gatus.base_url }}" + redirect-url: "https://{{ gatus_svc.domain }}/authorization-code/callback" + client-id: "{{ opentofu.authentik_data.gatus.client_id }}" + client-secret: "{{ opentofu.authentik_data.gatus.client_secret }}" + scopes: ["openid"] + + alerting: + email: "{{ vault_gatus.alerting.email }}" + ntfy: "{{ vault_gatus.alerting.ntfy }}" + + external-endpoints: "{{ gatus_external_endpoints_backups }}" + endpoints: "{{ gatus_endpoints_hosts | union(gatus_endpoints_services) | union(gatus_endpoints_other) }}" + +gatus_compose: + watchtower: true + image: twinproduction/gatus + volumes: + - ./config.yaml:/config/config.yaml + - data:/data + file: + volumes: + data: diff --git a/roles/healthcheck/files/data/http b/roles/healthcheck/files/data/http index 30abb45..7f346b7 100755 --- a/roles/healthcheck/files/data/http +++ b/roles/healthcheck/files/data/http @@ -38,7 +38,7 @@ check_url "mail.serguzim.me" #check_url "msrg.cc" # disabled because it keeps creating false alerts check_url "rss.serguzim.me" #check_url "serguzim.me" # disabled because it keeps creating false alerts -check_url "status.serguzim.me" "/status/serguzim-net" +#check_url "status.serguzim.me" "/status/serguzim-net" check_url "tick.serguzim.me" check_url "wiki.serguzim.me" check_url "www.reitanlage-oranienburg.de" diff --git a/roles/uptime_kuma/tasks/main.yml b/roles/uptime_kuma/tasks/main.yml deleted file mode 100644 index d0e8e13..0000000 --- a/roles/uptime_kuma/tasks/main.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -- name: Set common facts - ansible.builtin.import_tasks: tasks/set-default-facts.yml - -- name: Deploy {{ svc.name }} - vars: - svc: "{{ uptime_kuma_svc }}" - env: "{{ uptime_kuma_env }}" - compose: "{{ uptime_kuma_compose }}" - block: - - name: Import tasks to deploy common service - ansible.builtin.import_tasks: tasks/deploy-common-service.yml diff --git a/roles/uptime_kuma/vars/main.yml b/roles/uptime_kuma/vars/main.yml deleted file mode 100644 index 093005b..0000000 --- a/roles/uptime_kuma/vars/main.yml +++ /dev/null @@ -1,16 +0,0 @@ ---- -uptime_kuma_svc: - domain: status.serguzim.me - additional_domains: - - status.serguzim.net - name: uptime-kuma - port: 3001 - -uptime_kuma_compose: - watchtower: true - image: louislam/uptime-kuma:1 - volumes: - - data:/app/data - file: - volumes: - data: diff --git a/services.auto.tfvars b/services.auto.tfvars index bf58dcb..1df2338 100644 --- a/services.auto.tfvars +++ b/services.auto.tfvars @@ -6,6 +6,10 @@ services = { domain = "serguzim.me" target = "acme" }] + monitoring = { + url = "/health" + group = "7-support" + } auth = false database = true s3 = false @@ -18,6 +22,10 @@ services = { domain = "serguzim.me" target = "auth" }] + monitoring = { + url = "/-/health/live/" + group = "4-services" + } auth = false database = true s3 = false @@ -50,6 +58,10 @@ services = { alias = "faas" } ] + monitoring = { + url = "/healthz" + group = "7-support" + } auth = false database = false s3 = false @@ -66,6 +78,10 @@ services = { name = "forgejo_data" type = "docker" }] + monitoring = { + url = "/api/v1/version" + group = "4-services" + } auth = true auth_redirects = ["https://git.serguzim.me/user/oauth2/auth.serguzim.me/callback"] database = true @@ -88,6 +104,23 @@ services = { s3 = false }, + "gatus" = { + name = "gatus" + host = "node003" + dns = [{ + domain = "serguzim.me" + target = "status" + }] + backup = [{ + name = "gatus_data" + type = "docker" + }] + auth = true + auth_redirects = ["https://status.serguzim.me/authorization-code/callback"] + database = false + s3 = false + }, + "homebox" = { name = "homebox" host = "node002" @@ -99,6 +132,14 @@ services = { name = "homebox_data" type = "docker" }] + monitoring = { + url = "/api/v1/status" + group = "4-services" + conditions = [ + "DEFAULT", + "[BODY].health == true" + ] + } auth = false database = false s3 = false @@ -121,6 +162,9 @@ services = { type = "hook" } ] + monitoring = { + group = "4-services" + } auth = true auth_redirects = ["https://gallery.serguzim.me/auth/login"] database = false @@ -138,6 +182,14 @@ services = { name = "influxdb_data" type = "docker" }] + monitoring = { + url = "/health" + group = "4-services" + conditions = [ + "DEFAULT", + "[BODY].status == pass" + ] + } auth = false database = false s3 = false @@ -154,6 +206,14 @@ services = { name = "jellyfin_config" type = "docker" }] # TODO add jellyfin_media + monitoring = { + url = "/health" + group = "4-services" + conditions = [ + "DEFAULT", + "[BODY] == Healthy" + ] + } auth = false database = false s3 = false @@ -166,6 +226,10 @@ services = { domain = "serguzim.me" target = "bookmarks" }] + monitoring = { + url = "/api/v1/logins" + group = "4-services" + } auth = true auth_redirects = ["https://bookmarks.serguzim.me/api/v1/auth/callback/authentik"] database = true @@ -183,6 +247,9 @@ services = { name = "mailcow" type = "hook" }] + monitoring = { + group = "4-services" + } auth = false database = false s3 = false @@ -207,6 +274,10 @@ services = { name = "minio_data" type = "docker" }] + monitoring = { + url = "/minio/health/live" + group = "7-support" + } auth = false database = false s3 = false @@ -223,6 +294,14 @@ services = { name = "ntfy_data" type = "docker" }] + monitoring = { + url = "/v1/health" + group = "4-services" + conditions = [ + "DEFAULT", + "[BODY].healthy == true" + ] + } auth = false database = false s3 = false @@ -235,6 +314,7 @@ services = { name = "postgresql" type = "hook" }] + # TODO add monitoring auth = false database = false s3 = false @@ -259,6 +339,9 @@ services = { name = "reitanlage-oranienburg_data" type = "docker" }] + monitoring = { + group = "5-websites" + } auth = false database = false s3 = false @@ -279,6 +362,14 @@ services = { alias = "shlink" } ] + monitoring = { + url = "/rest/health" + group = "4-services" + conditions = [ + "DEFAULT", + "[BODY].status == pass" + ] + } auth = false database = true s3 = false @@ -303,6 +394,10 @@ services = { name = "synapse_media_store" type = "docker" }] + monitoring = { + url = "/_matrix/client/versions" + group = "4-services" + } ports = ["8448:8448"] auth = true auth_redirects = ["https://matrix.serguzim.me/_synapse/client/oidc/callback"] @@ -321,6 +416,10 @@ services = { name = "tandoor_mediafiles" type = "docker" }] + monitoring = { + url = "/accounts/login/" + group = "4-services" + } auth = false database = true s3 = false @@ -357,6 +456,10 @@ services = { domain = "serguzim.me" target = "rss" }] + monitoring = { + url = "/tt-rss/" + group = "4-services" + } auth = false database = true s3 = false @@ -369,22 +472,10 @@ services = { domain = "serguzim.me" target = "analytics" }] - auth = false - database = true - s3 = false - }, - - "uptime_kuma" = { - name = "uptime_kuma" - host = "node002" - dns = [{ - domain = "serguzim.me" - target = "status" - }] - backup = [{ - name = "uptime-kuma_data" - type = "docker" - }] + monitoring = { + url = "/api/heartbeat" + group = "4-services" + } auth = false database = true s3 = false @@ -401,6 +492,10 @@ services = { name = "vikunja_data" type = "docker" }] + monitoring = { + url = "/api/v1/info" + group = "4-services" + } auth = true auth_redirects = ["https://todo.serguzim.me/auth/openid/authserguzimme"] database = true @@ -414,6 +509,9 @@ services = { domain = "serguzim.me" target = "hook" }] + monitoring = { + group = "7-support" + } auth = false database = false s3 = false @@ -426,6 +524,9 @@ services = { domain = "serguzim.me" target = "wiki" }] + monitoring = { + group = "4-services" + } auth = true auth_redirects = ["https://wiki.serguzim.me/login/f792bc7d-1a25-4437-944e-55eaf0111102/callback"] database = true @@ -439,6 +540,10 @@ services = { domain = "serguzim.me" target = "ci" }] + monitoring = { + url = "/healthz" + group = "4-services" + } auth = false database = true s3 = false diff --git a/variables.tf b/variables.tf index f40bb29..a6495e1 100644 --- a/variables.tf +++ b/variables.tf @@ -137,6 +137,11 @@ variable "services" { name = string type = string }))) + monitoring = optional(object({ + url = optional(string) + group = optional(string) + conditions = optional(list(string)) + })) ports = optional(list(string)) auth = bool auth_redirects = optional(list(string))