jobs/monitoring: Add Prometheus

The change adds the Prometheus job. I had to go with certain assumptions
regarding the host constraint, as well as lack of volumes, but I hope
this serves as the base for running the monitoring stack on Nomad
itself.

The Prometheus instance is on purpose left unexposed, so that we can use
Grafana and additional authentication in that layer later.
This commit is contained in:
Wojtek Bednarzak 2023-03-22 23:13:39 +00:00
parent 5d75e7287e
commit 6683ba6283
No known key found for this signature in database
GPG key ID: 0A3DE4DE31605343
2 changed files with 88 additions and 0 deletions

View file

@ -0,0 +1,5 @@
# Monitoring Nomad Jobs
The following directory contains the jobs for monitoring.
`monitoring` namespace must be created prior to deploying the jobs.

View file

@ -0,0 +1,83 @@
job "prometheus" {
datacenters = ["aperature"]
namespace = "monitoring"
group "prometheus" {
network {
port "http" {
to = 9090
}
}
constraint {
attribute = "${attr.unique.hostname}"
value = "wheatley"
}
service {
name = "prometheus"
port = "http"
}
task "prometheus" {
driver = "docker"
config {
image = "quay.io/prometheus/prometheus:v2.43.0"
args = [
"--config.file=$${NOMAD_TASK_DIR}/prometheus.yml",
"--log.level=info",
"--storage.tsdb.retention.time=90d",
"--storage.tsdb.path=/prometheus",
"--web.console.libraries=/usr/share/prometheus/console_libraries",
"--web.console.templates=/usr/share/prometheus/consoles"
]
}
# TODO: Add volumes for persistent storage, configured to be
# /prometheus on the container.
template {
destination = "local/prometheus.yml"
data = <<EOF
global:
scrape_interval: 10s
evaluation_interval: 10s
scrape_configs:
- job_name: 'nomad_metrics'
consul_sd_configs:
- server: '{{ env "attr.unique.network.ip-address" }}:8500'
services: ['nomad-client', 'nomad']
relabel_configs:
- source_labels: ['__meta_consul_tags']
regex: '(.*)http(.*)'
action: keep
- source_labels: ['__meta_consul_node']
target_label: 'node'
# If nomad is available on multiple IPs, drop the ones which are not scrapable
- source_labels: ['__address__']
regex: '172(.*)'
action: drop
metrics_path: /v1/metrics
params:
format: ['prometheus']
- job_name: 'application_metrics'
consul_sd_configs:
- server: '{{ env "attr.unique.network.ip-address" }}:8500'
relabel_configs:
- source_labels: ['__meta_consul_service']
regex: 'nomad|nomad-client|consul'
action: drop
# Drop services which do not want to be scraped.
# Typically used when a job does not expose prometheus metrics.
- source_labels: ['__meta_consul_tags']
regex: '(.*)prometheus.io/scrape=false(.*)'
action: 'drop'
- source_labels: ['__meta_consul_node']
target_label: 'node'
- source_labels: ['__meta_consul_service']
target_label: 'service'
EOF
}
}
}
}