jobs/monitoring: Add Prometheus
The change adds the Prometheus job. I had to go with certain assumptions regarding the host constraint, as well as lack of volumes, but I hope this serves as the base for running the monitoring stack on Nomad itself. The Prometheus instance is on purpose left unexposed, so that we can use Grafana and additional authentication in that layer later.
This commit is contained in:
		
							parent
							
								
									5d75e7287e
								
							
						
					
					
						commit
						6683ba6283
					
				
					 2 changed files with 88 additions and 0 deletions
				
			
		
							
								
								
									
										5
									
								
								jobs/monitoring/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								jobs/monitoring/README.md
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,5 @@
 | 
				
			||||||
 | 
					# Monitoring Nomad Jobs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The following directory contains the jobs for monitoring.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					`monitoring` namespace must be created prior to deploying the jobs.
 | 
				
			||||||
							
								
								
									
										83
									
								
								jobs/monitoring/prometheus.hcl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								jobs/monitoring/prometheus.hcl
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,83 @@
 | 
				
			||||||
 | 
					job "prometheus" {
 | 
				
			||||||
 | 
					    datacenters = ["aperature"]
 | 
				
			||||||
 | 
					    namespace = "monitoring"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    group "prometheus" {
 | 
				
			||||||
 | 
					        network {
 | 
				
			||||||
 | 
					            port "http" {
 | 
				
			||||||
 | 
					                to = 9090
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        constraint {
 | 
				
			||||||
 | 
					            attribute = "${attr.unique.hostname}"
 | 
				
			||||||
 | 
					            value = "wheatley"
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        service {
 | 
				
			||||||
 | 
					            name = "prometheus"
 | 
				
			||||||
 | 
					            port = "http"
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        task "prometheus" {
 | 
				
			||||||
 | 
					            driver = "docker"
 | 
				
			||||||
 | 
					            config {
 | 
				
			||||||
 | 
					                image = "quay.io/prometheus/prometheus:v2.43.0"
 | 
				
			||||||
 | 
					                args = [
 | 
				
			||||||
 | 
					                    "--config.file=$${NOMAD_TASK_DIR}/prometheus.yml",
 | 
				
			||||||
 | 
					                    "--log.level=info",
 | 
				
			||||||
 | 
					                    "--storage.tsdb.retention.time=90d",
 | 
				
			||||||
 | 
					                    "--storage.tsdb.path=/prometheus",
 | 
				
			||||||
 | 
					                    "--web.console.libraries=/usr/share/prometheus/console_libraries",
 | 
				
			||||||
 | 
					                    "--web.console.templates=/usr/share/prometheus/consoles"
 | 
				
			||||||
 | 
					                ]
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # TODO: Add volumes for persistent storage, configured to be
 | 
				
			||||||
 | 
					            # /prometheus on the container.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            template {
 | 
				
			||||||
 | 
					                destination = "local/prometheus.yml"
 | 
				
			||||||
 | 
					                data = <<EOF
 | 
				
			||||||
 | 
					global:
 | 
				
			||||||
 | 
					  scrape_interval: 10s
 | 
				
			||||||
 | 
					  evaluation_interval: 10s
 | 
				
			||||||
 | 
					scrape_configs:
 | 
				
			||||||
 | 
					- job_name: 'nomad_metrics'
 | 
				
			||||||
 | 
					  consul_sd_configs:
 | 
				
			||||||
 | 
					  - server: '{{ env "attr.unique.network.ip-address" }}:8500'
 | 
				
			||||||
 | 
					    services: ['nomad-client', 'nomad']
 | 
				
			||||||
 | 
					  relabel_configs:
 | 
				
			||||||
 | 
					  - source_labels: ['__meta_consul_tags']
 | 
				
			||||||
 | 
					    regex: '(.*)http(.*)'
 | 
				
			||||||
 | 
					    action: keep
 | 
				
			||||||
 | 
					  - source_labels: ['__meta_consul_node']
 | 
				
			||||||
 | 
					    target_label: 'node'
 | 
				
			||||||
 | 
					  # If nomad is available on multiple IPs, drop the ones which are not scrapable
 | 
				
			||||||
 | 
					  - source_labels: ['__address__']
 | 
				
			||||||
 | 
					    regex: '172(.*)'
 | 
				
			||||||
 | 
					    action: drop
 | 
				
			||||||
 | 
					  metrics_path: /v1/metrics
 | 
				
			||||||
 | 
					  params:
 | 
				
			||||||
 | 
					    format: ['prometheus']
 | 
				
			||||||
 | 
					- job_name: 'application_metrics'
 | 
				
			||||||
 | 
					  consul_sd_configs:
 | 
				
			||||||
 | 
					  - server: '{{ env "attr.unique.network.ip-address" }}:8500'
 | 
				
			||||||
 | 
					  relabel_configs:
 | 
				
			||||||
 | 
					  - source_labels: ['__meta_consul_service']
 | 
				
			||||||
 | 
					    regex: 'nomad|nomad-client|consul'
 | 
				
			||||||
 | 
					    action: drop
 | 
				
			||||||
 | 
					  # Drop services which do not want to be scraped.
 | 
				
			||||||
 | 
					  # Typically used when a job does not expose prometheus metrics.
 | 
				
			||||||
 | 
					  - source_labels: ['__meta_consul_tags']
 | 
				
			||||||
 | 
					    regex: '(.*)prometheus.io/scrape=false(.*)'
 | 
				
			||||||
 | 
					    action: 'drop'
 | 
				
			||||||
 | 
					  - source_labels: ['__meta_consul_node']
 | 
				
			||||||
 | 
					    target_label: 'node'
 | 
				
			||||||
 | 
					  - source_labels: ['__meta_consul_service']
 | 
				
			||||||
 | 
					    target_label: 'service'
 | 
				
			||||||
 | 
					                EOF
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
		Reference in a new issue