Compare commits

...

6 commits

Author SHA1 Message Date
wizzdom
6a3f7018e0
grafana: add dynamic prometheus datasource 2025-02-28 02:40:16 +00:00
Gavin Holahan
10fbcd5268
service 2025-02-28 02:12:19 +00:00
wizzdom
9cc57a2ed8
traefik: use consul, bump res, add metrics, fix dummy service 2025-02-28 01:25:49 +00:00
Gavin Holahan
814655cc5f
Removed Static, added better prom .yml 2025-02-28 01:25:49 +00:00
wizzdom
0d6200742a
monitoring: add prometheus 2025-02-28 01:25:49 +00:00
wizzdom
13a0675bab
monitoring: add grafana 2025-02-28 01:25:49 +00:00
3 changed files with 195 additions and 3 deletions

View file

@ -51,7 +51,7 @@ job "traefik" {
service {
name = "traefik-http"
provider = "nomad"
provider = "consul"
port = "https"
}
@ -67,6 +67,10 @@ job "traefik" {
"/storage/nomad/traefik/access.log:/access.log",
]
}
resources {
cpu = 500
memory = 1024
}
template {
data = <<EOF
@ -157,7 +161,11 @@ job "traefik" {
storage = "acme.json"
[certificatesResolvers.lets-encrypt.acme.tlsChallenge]
[tracing]
[metrics]
[metrics.prometheus]
addServicesLabels = true
addRoutersLabels = true
addEntryPointsLabels = true
[accessLog]
filePath = "/access.log"
@ -195,7 +203,7 @@ EOF
[http.services]
[http.services.dummy-service.loadBalancer]
[[http.services.dummy-service.loadBalancer.servers]]
url = "http://127.0.0.1" # Dummy service - not used
url = "http://0.0.0.0" # Dummy service - not used
EOF
destination = "local/dynamic.toml"
change_mode = "noop"

110
jobs/monitoring/grafana.hcl Normal file
View file

@ -0,0 +1,110 @@
job "grafana" {
datacenters = ["aperture"]
type = "service"
group "monitoring" {
network {
port "http" {
to = 3000
}
port "db" {
to = 5432
}
}
service {
name = "grafana"
port = "http"
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
tags = [
"traefik.enable=true",
"traefik.http.routers.grafana.entrypoints=web,websecure",
"traefik.http.routers.grafana.rule=Host(`grafana.redbrick.dcu.ie`)",
"traefik.http.routers.grafana.tls=true",
"traefik.http.routers.grafana.tls.certresolver=lets-encrypt",
]
}
task "grafana" {
driver = "docker"
user = "1001:1001"
env {
GF_AUTH_BASIC_ENABLED = "true"
GF_INSTALL_PLUGINS = "grafana-piechart-panel"
GF_SERVER_ROOT_URL = "https://grafana.redbrick.dcu.ie"
}
config {
image = "grafana/grafana"
ports = ["http"]
volumes = [
"/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/grafana",
"local/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml"
]
}
template {
data = <<EOH
GF_DATABASE_TYPE=postgres
GF_DATABASE_HOST={{ env "NOMAD_ADDR_db" }}
GF_DATABASE_NAME={{ key "grafana/db/name" }}
GF_DATABASE_USER={{ key "grafana/db/user" }}
GF_DATABASE_PASSWORD={{ key "grafana/db/password" }}
GF_FEATURE_TOGGLES_ENABLE=publicDashboards
GF_LOG_LEVEL=debug
EOH
destination = "local/.env"
env = true
}
template {
data = <<EOH
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
{{- range service "prometheus" }}
url: http://prometheus.service.consul:{{ .Port }}{{ end }}
isDefault: true
editable: false
EOH
destination = "local/datasources.yml"
}
}
task "db" {
driver = "docker"
config {
image = "postgres:17-alpine"
ports = ["db"]
volumes = [
"/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/postgresql/data",
]
}
template {
data = <<EOH
POSTGRES_PASSWORD={{ key "grafana/db/password" }}
POSTGRES_USER={{ key "grafana/db/user" }}
POSTGRES_NAME={{ key "grafana/db/name" }}
EOH
destination = "local/db.env"
env = true
}
}
}
}

View file

@ -0,0 +1,74 @@
job "prometheus" {
datacenters = ["aperture"]
type = "service"
group "prometheus" {
count = 1
network {
port "http" {
to = 9090
}
}
task "prometheus" {
driver = "docker"
service {
name = "prometheus"
port = "http"
}
config {
image = "prom/prometheus:latest"
ports = ["http"]
volumes = [
"local/prometheus.yml:/etc/prometheus/prometheus.yml"
]
}
template {
destination = "local/prometheus.yml"
data = <<EOF
global:
scrape_interval: 10s
evaluation_interval: 10s
scrape_configs:
- job_name: 'nomad_metrics'
consul_sd_configs:
- server: 'consul.service.consul:8500'
services: ['nomad-client', 'nomad'] # This allows for Client (Workload) and Server (Orchastration) metrics
tags: ['http']
metrics_path: /v1/metrics
params:
format: ['prometheus']
- job_name: 'container-metrics'
consul_sd_configs:
- server: 'consul.service.consul:8500'
tags: ['prometheus.enable=true']
relabel_configs:
- source_labels: ['__meta_consul_service']
target_label: 'job'
replacement: 'consul-service'
- source_labels: ['__meta_consul_tags']
regex: '.*prometheus.path=([^,]+).*' # Extract path from tag
target_label: '__metrics_path__'
metrics_path: /v1/metrics
params:
format: ['prometheus']
EOF
}
resources {
cpu = 500
memory = 512
}
}
}
}