Compare commits

..

6 commits

Author SHA1 Message Date
wizzdom
6a3f7018e0
grafana: add dynamic prometheus datasource 2025-02-28 02:40:16 +00:00
Gavin Holahan
10fbcd5268
service 2025-02-28 02:12:19 +00:00
wizzdom
9cc57a2ed8
traefik: use consul, bump res, add metrics, fix dummy service 2025-02-28 01:25:49 +00:00
Gavin Holahan
814655cc5f
Removed Static, added better prom .yml 2025-02-28 01:25:49 +00:00
wizzdom
0d6200742a
monitoring: add prometheus 2025-02-28 01:25:49 +00:00
wizzdom
13a0675bab
monitoring: add grafana 2025-02-28 01:25:49 +00:00
10 changed files with 202 additions and 145 deletions

View file

@ -1,64 +0,0 @@
job "esports-minecraft" {
datacenters = ["aperture"]
type = "service"
group "esports-mc" {
count = 1
network {
port "mc" {
to = 25565
}
port "rcon" {
to = 25575
}
}
service {
name = "esports-mc"
port = "mc"
}
service {
name = "esports-mc-rcon"
port = "rcon"
}
task "esports-minecraft" {
driver = "docker"
config {
image = "itzg/minecraft-server"
ports = ["mc", "rcon"]
volumes = [
"/storage/nomad/${NOMAD_TASK_NAME}:/data"
]
}
resources {
cpu = 5000 # 5000 MHz
memory = 20480 # 20 GB
}
template {
data = <<EOF
EULA = "TRUE"
TYPE = "PAPER"
VERSION = "1.21.4"
ICON = "https://liquipedia.net/commons/images/thumb/5/53/DCU_Esports_allmode.png/37px-DCU_Esports_allmode.png"
USE_AIKAR_FLAGS = true
MAX_MEMORY = 18G
MOTD = "Powered by Redbrick"
MAX_PLAYERS = "32"
VIEW_DISTANCE = "32"
ENABLE_RCON = true
RCON_PASSWORD = {{ key "games/mc/esports-mc/rcon/password" }}
# Auto-download plugins
SPIGET_RESOURCES=83581,62325,118271,28140,102931 # RHLeafDecay, GSit, GravesX, Luckperms, NoChatReport
MODRINTH_PROJECTS=datapack:no-enderman-grief,thizzyz-tree-feller,imageframe,bmarker,datapack:players-drop-heads,viaversion,viabackwards
EOF
destination = "local/.env"
env = true
}
}
}
}

View file

@ -51,7 +51,7 @@ job "traefik" {
service {
name = "traefik-http"
provider = "nomad"
provider = "consul"
port = "https"
}
@ -67,6 +67,10 @@ job "traefik" {
"/storage/nomad/traefik/access.log:/access.log",
]
}
resources {
cpu = 500
memory = 1024
}
template {
data = <<EOF
@ -157,7 +161,11 @@ job "traefik" {
storage = "acme.json"
[certificatesResolvers.lets-encrypt.acme.tlsChallenge]
[tracing]
[metrics]
[metrics.prometheus]
addServicesLabels = true
addRoutersLabels = true
addEntryPointsLabels = true
[accessLog]
filePath = "/access.log"
@ -195,7 +203,7 @@ EOF
[http.services]
[http.services.dummy-service.loadBalancer]
[[http.services.dummy-service.loadBalancer.servers]]
url = "http://127.0.0.1" # Dummy service - not used
url = "http://0.0.0.0" # Dummy service - not used
EOF
destination = "local/dynamic.toml"
change_mode = "noop"

110
jobs/monitoring/grafana.hcl Normal file
View file

@ -0,0 +1,110 @@
job "grafana" {
datacenters = ["aperture"]
type = "service"
group "monitoring" {
network {
port "http" {
to = 3000
}
port "db" {
to = 5432
}
}
service {
name = "grafana"
port = "http"
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
tags = [
"traefik.enable=true",
"traefik.http.routers.grafana.entrypoints=web,websecure",
"traefik.http.routers.grafana.rule=Host(`grafana.redbrick.dcu.ie`)",
"traefik.http.routers.grafana.tls=true",
"traefik.http.routers.grafana.tls.certresolver=lets-encrypt",
]
}
task "grafana" {
driver = "docker"
user = "1001:1001"
env {
GF_AUTH_BASIC_ENABLED = "true"
GF_INSTALL_PLUGINS = "grafana-piechart-panel"
GF_SERVER_ROOT_URL = "https://grafana.redbrick.dcu.ie"
}
config {
image = "grafana/grafana"
ports = ["http"]
volumes = [
"/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/grafana",
"local/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml"
]
}
template {
data = <<EOH
GF_DATABASE_TYPE=postgres
GF_DATABASE_HOST={{ env "NOMAD_ADDR_db" }}
GF_DATABASE_NAME={{ key "grafana/db/name" }}
GF_DATABASE_USER={{ key "grafana/db/user" }}
GF_DATABASE_PASSWORD={{ key "grafana/db/password" }}
GF_FEATURE_TOGGLES_ENABLE=publicDashboards
GF_LOG_LEVEL=debug
EOH
destination = "local/.env"
env = true
}
template {
data = <<EOH
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
{{- range service "prometheus" }}
url: http://prometheus.service.consul:{{ .Port }}{{ end }}
isDefault: true
editable: false
EOH
destination = "local/datasources.yml"
}
}
task "db" {
driver = "docker"
config {
image = "postgres:17-alpine"
ports = ["db"]
volumes = [
"/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/postgresql/data",
]
}
template {
data = <<EOH
POSTGRES_PASSWORD={{ key "grafana/db/password" }}
POSTGRES_USER={{ key "grafana/db/user" }}
POSTGRES_NAME={{ key "grafana/db/name" }}
EOH
destination = "local/db.env"
env = true
}
}
}
}

View file

@ -0,0 +1,74 @@
job "prometheus" {
datacenters = ["aperture"]
type = "service"
group "prometheus" {
count = 1
network {
port "http" {
to = 9090
}
}
task "prometheus" {
driver = "docker"
service {
name = "prometheus"
port = "http"
}
config {
image = "prom/prometheus:latest"
ports = ["http"]
volumes = [
"local/prometheus.yml:/etc/prometheus/prometheus.yml"
]
}
template {
destination = "local/prometheus.yml"
data = <<EOF
global:
scrape_interval: 10s
evaluation_interval: 10s
scrape_configs:
- job_name: 'nomad_metrics'
consul_sd_configs:
- server: 'consul.service.consul:8500'
services: ['nomad-client', 'nomad'] # This allows for Client (Workload) and Server (Orchastration) metrics
tags: ['http']
metrics_path: /v1/metrics
params:
format: ['prometheus']
- job_name: 'container-metrics'
consul_sd_configs:
- server: 'consul.service.consul:8500'
tags: ['prometheus.enable=true']
relabel_configs:
- source_labels: ['__meta_consul_service']
target_label: 'job'
replacement: 'consul-service'
- source_labels: ['__meta_consul_tags']
regex: '.*prometheus.path=([^,]+).*' # Extract path from tag
target_label: '__metrics_path__'
metrics_path: /v1/metrics
params:
format: ['prometheus']
EOF
}
resources {
cpu = 500
memory = 512
}
}
}
}

View file

@ -1,44 +0,0 @@
job "uptime-kuma" {
datacenters = ["aperture"]
type = "service"
group "web" {
count = 1
network {
port "http" {
to = 3001
}
}
service {
port = "http"
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
tags = [
"traefik.enable=true",
"traefik.http.routers.uptime-kuma.rule=Host(`status.redbrick.dcu.ie`)",
"traefik.http.routers.uptime-kuma.entrypoints=web,websecure",
"traefik.http.routers.uptime-kuma.tls.certresolver=lets-encrypt",
]
}
task "web" {
driver = "docker"
config {
image = "louislam/uptime-kuma:1"
ports = ["http"]
volumes = [
"/storage/nomad/uptime-kuma/data:/app/data"
]
}
}
}
}

View file

@ -41,7 +41,7 @@ $wgDBpassword = "{{ key "mediawiki/db/password" }}";
# MySQL specific settings
$wgDBprefix = "rbwiki_";
# MySQL table options to use during installation or update
$wgDBTableOptions = "ENGINE=InnoDB, DEFAULT CHARSET=utf8mb4";
$wgDBTableOptions = "ENGINE=InnoDB, DEFAULT CHARSET=binary";
## Shared memory settings
$wgMainCacheType = CACHE_NONE;
@ -89,15 +89,11 @@ wfLoadSkin( 'Vector' );
wfLoadSkin( 'Citizen' );
wfLoadSkin( 'Timeless' );
wfLoadSkin( 'MinervaNeue' );
wfLoadSkin( 'Medik' );
$wgCitizenThemeColor = "#a81e22";
$wgCitizenShowPageTools = "permission";
$wgCitizenSearchDescriptionSource = "pagedescription";
$wgMedikColor = "#a81e22";
$wgMedikShowLogo = "main";
$wgLocalisationUpdateDirectory = "$IP/cache";
# load extensions

View file

@ -30,7 +30,7 @@ job_name=$(echo ${NOMAD_JOB_NAME} | cut -d "/" -f 1)
nomad alloc exec -task rbwiki-db $alloc_id mariadb-dump -u {{ key "mediawiki/db/username" }} -p'{{ key "mediawiki/db/password"}}' {{ key "mediawiki/db/name" }} > "${file}"
find /storage/backups/nomad/wiki/mysql/rbwiki-mysql* -ctime +30 -exec rm {} \; || true
find /storage/backups/nomad/wiki/mysql/rbwiki-mysql* -ctime +3 -exec rm {} \; || true
if [ -s "$file" ]; then # check if file exists and is not empty
echo "Backup successful"

View file

@ -213,18 +213,6 @@ EOH
template {
data = <<EOH
[mysqld]
# Ensure full UTF-8 support
character-set-server = utf8mb4
collation-server = utf8mb4_unicode_ci
skip-character-set-client-handshake
# Fix 1000-byte key length issue
innodb_large_prefix = 1
innodb_file_format = Barracuda
innodb_file_per_table = 1
innodb_default_row_format = dynamic
# Performance optimizations (Keep these based on your system)
max_connections = 100
key_buffer_size = 2G
query_cache_size = 0
@ -236,14 +224,13 @@ innodb_io_capacity = 200
tmp_table_size = 5242K
max_heap_table_size = 5242K
innodb_log_buffer_size = 16M
innodb_file_per_table = 1
bind-address = 0.0.0.0
# Logging
slow_query_log = 1
slow_query_log_file = /var/log/mysql/slow.log
long_query_time = 1
# Network
bind-address = 0.0.0.0
EOH
destination = "local/conf.cnf"

View file

@ -10,7 +10,6 @@ job "esports-discord-bot" {
config {
image = "ghcr.io/aydenjahola/discord-multipurpose-bot:main"
force_pull = true
}
resources {
@ -29,15 +28,6 @@ RAPIDAPI_KEY={{ key "socs/esports/bot/rapidapi/key" }}
TRACKER_API_KEY={{ key "socs/esports/bot/trackerapi/key" }}
TRACKER_API_URL={{ key "socs/esports/bot/trackerapi/url" }}
WORDNIK_API_KEY={{key "socs/esports/bot/wordnikapi/key" }}
HUGGING_FACE_API_KEY={{ key "socs/esports/bot/huggingface/key" }}
RCON_HOST=esports-mc-rcon.service.consul
# https://discuss.hashicorp.com/t/passing-registered-ip-and-port-from-consul-to-env-nomad-job-section/35647
{{ range service "esports-mc-rcon" }}
RCON_PORT={{ .Port }}{{ end }}
RCON_PASSWORD={{ key "games/mc/esports-mc/rcon/password" }}
EOH
destination = "local/.env"
env = true

View file

@ -38,7 +38,7 @@ alloc_id=$(nomad job status -verbose bastion-vm | grep running | tail -n 1 | cut
job_name=$(echo ${NOMAD_JOB_NAME} | cut -d "/" -f 1)
echo "Backing up alloc id: ${alloc_id} on: ${host} to ${path}/${file}..."
ssh -i {{ key "bastion-vm/service/key" }} {{ key "bastion-vm/service/user" }}@${host} "sudo cat /opt/nomad/alloc/${alloc_id}/bastion-vm/local/bastion-vm.qcow2" > ${path}/${file}
scp -B -i {{ key "bastion-vm/service/key" }} {{ key "bastion-vm/service/user" }}@${host}:/opt/nomad/alloc/${alloc_id}/bastion-vm/local/bastion-vm.qcow2 ${path}/${file}
find ${path}/bastion-vm-* -ctime +2 -exec rm {} \; || true