From 13a0675babe3605d9b52166eb61f762fc9faddac Mon Sep 17 00:00:00 2001 From: wizzdom Date: Wed, 26 Feb 2025 04:20:49 +0000 Subject: [PATCH 1/6] monitoring: add grafana --- jobs/monitoring/grafana.hcl | 95 +++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 jobs/monitoring/grafana.hcl diff --git a/jobs/monitoring/grafana.hcl b/jobs/monitoring/grafana.hcl new file mode 100644 index 0000000..2d76bc1 --- /dev/null +++ b/jobs/monitoring/grafana.hcl @@ -0,0 +1,95 @@ +job "grafana" { + datacenters = ["aperture"] + + type = "service" + + group "monitoring" { + network { + port "http" { + to = 3000 + } + port "db" { + to = 5432 + } + } + + service { + name = "grafana" + port = "http" + + check { + type = "http" + path = "/" + interval = "10s" + timeout = "2s" + } + + tags = [ + "traefik.enable=true", + "traefik.http.routers.grafana.entrypoints=web,websecure", + "traefik.http.routers.grafana.rule=Host(`grafana.redbrick.dcu.ie`)", + "traefik.http.routers.grafana.tls=true", + "traefik.http.routers.grafana.tls.certresolver=lets-encrypt", + ] + } + + task "grafana" { + driver = "docker" + user = "1001:1001" + + env { + GF_AUTH_BASIC_ENABLED = "true" + GF_INSTALL_PLUGINS = "grafana-piechart-panel" + GF_SERVER_ROOT_URL = "https://grafana.redbrick.dcu.ie" + } + + config { + image = "grafana/grafana" + ports = ["http"] + + volumes = [ + "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/grafana" + ] + } + + + template { + data = < Date: Wed, 26 Feb 2025 04:21:12 +0000 Subject: [PATCH 2/6] monitoring: add prometheus --- jobs/monitoring/prometheus.hcl | 87 ++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 jobs/monitoring/prometheus.hcl diff --git a/jobs/monitoring/prometheus.hcl b/jobs/monitoring/prometheus.hcl new file mode 100644 index 0000000..764673a --- /dev/null +++ b/jobs/monitoring/prometheus.hcl @@ -0,0 +1,87 @@ +job "prometheus" { + datacenters = ["aperture"] + + group "prometheus" { + network { + port "http" { + static = 9090 + } + } + + service { + name = "prometheus" + port = "http" + } + + task "prometheus" { + driver = "docker" + config { + image = "quay.io/prometheus/prometheus" + ports = ["http"] + + volumes = [ + "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/prometheus" + ] + + args = [ + "--config.file=$${NOMAD_TASK_DIR}/prometheus.yml", + "--log.level=info", + "--storage.tsdb.retention.time=90d", + "--storage.tsdb.path=/prometheus", + "--web.console.libraries=/usr/share/prometheus/console_libraries", + "--web.console.templates=/usr/share/prometheus/consoles" + ] + } + + template { + data = < Date: Fri, 28 Feb 2025 01:09:05 +0000 Subject: [PATCH 3/6] Removed Static, added better prom .yml --- jobs/monitoring/prometheus.hcl | 80 ++++++++++++++-------------------- 1 file changed, 32 insertions(+), 48 deletions(-) diff --git a/jobs/monitoring/prometheus.hcl b/jobs/monitoring/prometheus.hcl index 764673a..6ffa3af 100644 --- a/jobs/monitoring/prometheus.hcl +++ b/jobs/monitoring/prometheus.hcl @@ -1,87 +1,71 @@ job "prometheus" { datacenters = ["aperture"] + type = "service" group "prometheus" { + count = 1 + network { port "http" { - static = 9090 + to = 9090 } } - service { - name = "prometheus" - port = "http" - } - task "prometheus" { driver = "docker" + config { - image = "quay.io/prometheus/prometheus" + image = "prom/prometheus:latest" ports = ["http"] volumes = [ - "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/prometheus" - ] - - args = [ - "--config.file=$${NOMAD_TASK_DIR}/prometheus.yml", - "--log.level=info", - "--storage.tsdb.retention.time=90d", - "--storage.tsdb.path=/prometheus", - "--web.console.libraries=/usr/share/prometheus/console_libraries", - "--web.console.templates=/usr/share/prometheus/consoles" + "local/prometheus.yml:/etc/prometheus/prometheus.yml" ] } template { + destination = "local/prometheus.yml" data = < Date: Fri, 28 Feb 2025 01:23:37 +0000 Subject: [PATCH 4/6] traefik: use consul, bump res, add metrics, fix dummy service --- jobs/ingress/traefik.hcl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/jobs/ingress/traefik.hcl b/jobs/ingress/traefik.hcl index 3693f5c..8574520 100644 --- a/jobs/ingress/traefik.hcl +++ b/jobs/ingress/traefik.hcl @@ -51,7 +51,7 @@ job "traefik" { service { name = "traefik-http" - provider = "nomad" + provider = "consul" port = "https" } @@ -67,6 +67,10 @@ job "traefik" { "/storage/nomad/traefik/access.log:/access.log", ] } + resources { + cpu = 500 + memory = 1024 + } template { data = < Date: Fri, 28 Feb 2025 02:12:19 +0000 Subject: [PATCH 5/6] service --- jobs/monitoring/prometheus.hcl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/jobs/monitoring/prometheus.hcl b/jobs/monitoring/prometheus.hcl index 6ffa3af..bd5c882 100644 --- a/jobs/monitoring/prometheus.hcl +++ b/jobs/monitoring/prometheus.hcl @@ -14,6 +14,11 @@ job "prometheus" { task "prometheus" { driver = "docker" + service { + name = "prometheus" + port = "http" + } + config { image = "prom/prometheus:latest" ports = ["http"] @@ -56,8 +61,6 @@ scrape_configs: metrics_path: /v1/metrics params: format: ['prometheus'] - - EOF } From 6a3f7018e0cd90c6f9cdd5495ed4aa1423fbb080 Mon Sep 17 00:00:00 2001 From: wizzdom Date: Fri, 28 Feb 2025 02:40:16 +0000 Subject: [PATCH 6/6] grafana: add dynamic prometheus datasource --- jobs/monitoring/grafana.hcl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/jobs/monitoring/grafana.hcl b/jobs/monitoring/grafana.hcl index 2d76bc1..5a981ae 100644 --- a/jobs/monitoring/grafana.hcl +++ b/jobs/monitoring/grafana.hcl @@ -48,11 +48,11 @@ job "grafana" { ports = ["http"] volumes = [ - "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/grafana" + "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/grafana", + "local/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml" ] } - template { data = <