mirror of
https://github.com/Blockstream/satellite-api.git
synced 2025-02-21 21:31:37 +01:00
tf: update prometheus instance
This commit is contained in:
parent
b45faeb54a
commit
6b6448437e
8 changed files with 20 additions and 140 deletions
|
@ -7,7 +7,7 @@ variables:
|
||||||
API_IMAGE: us.gcr.io/satellite-api/satellite-api
|
API_IMAGE: us.gcr.io/satellite-api/satellite-api
|
||||||
SSE_IMAGE: us.gcr.io/satellite-api/satellite-api-sse
|
SSE_IMAGE: us.gcr.io/satellite-api/satellite-api-sse
|
||||||
|
|
||||||
image: blockstream/gcloud-docker:v0.14.5
|
image: blockstream/gcloud-docker:1.0.5
|
||||||
stages:
|
stages:
|
||||||
- test
|
- test
|
||||||
- build
|
- build
|
||||||
|
@ -363,7 +363,6 @@ plan_misc:
|
||||||
-var "onion_host=$ONION_HOST"
|
-var "onion_host=$ONION_HOST"
|
||||||
-var "prom_allowed_source_ip=$PROMETHEUS_ALLOWED_SOURCE_IP"
|
-var "prom_allowed_source_ip=$PROMETHEUS_ALLOWED_SOURCE_IP"
|
||||||
-var "prom_service_acct=$PROM_SA"
|
-var "prom_service_acct=$PROM_SA"
|
||||||
-var "opsgenie_key=$OPSGENIE_KEY"
|
|
||||||
-var "satellite_lb=$SATELLITE_LB"
|
-var "satellite_lb=$SATELLITE_LB"
|
||||||
-var "satellite_api_lb=$SATELLITE_API_LB"
|
-var "satellite_api_lb=$SATELLITE_API_LB"
|
||||||
-var "satellite_api_lb_staging=$SATELLITE_API_LB_STAGING"
|
-var "satellite_api_lb_staging=$SATELLITE_API_LB_STAGING"
|
||||||
|
@ -392,7 +391,6 @@ deploy_misc:
|
||||||
-var "onion_host=$ONION_HOST"
|
-var "onion_host=$ONION_HOST"
|
||||||
-var "prom_allowed_source_ip=$PROMETHEUS_ALLOWED_SOURCE_IP"
|
-var "prom_allowed_source_ip=$PROMETHEUS_ALLOWED_SOURCE_IP"
|
||||||
-var "prom_service_acct=$PROM_SA"
|
-var "prom_service_acct=$PROM_SA"
|
||||||
-var "opsgenie_key=$OPSGENIE_KEY"
|
|
||||||
-var "satellite_lb=$SATELLITE_LB"
|
-var "satellite_lb=$SATELLITE_LB"
|
||||||
-var "satellite_api_lb=$SATELLITE_API_LB"
|
-var "satellite_api_lb=$SATELLITE_API_LB"
|
||||||
-var "satellite_api_lb_staging=$SATELLITE_API_LB_STAGING"
|
-var "satellite_api_lb_staging=$SATELLITE_API_LB_STAGING"
|
||||||
|
|
|
@ -7,7 +7,7 @@ terraform {
|
||||||
source = "hashicorp/google-beta"
|
source = "hashicorp/google-beta"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
required_version = ">= 0.14"
|
required_version = ">= 0.15"
|
||||||
|
|
||||||
backend "gcs" {
|
backend "gcs" {
|
||||||
bucket = "terraform-bs-source"
|
bucket = "terraform-bs-source"
|
||||||
|
@ -169,7 +169,6 @@ module "prometheus" {
|
||||||
zone = var.zone
|
zone = var.zone
|
||||||
instance_type = var.instance_type[1]
|
instance_type = var.instance_type[1]
|
||||||
prom_allowed_source_ip = var.prom_allowed_source_ip
|
prom_allowed_source_ip = var.prom_allowed_source_ip
|
||||||
opsgenie_key = var.opsgenie_key
|
|
||||||
prom_service_acct = var.prom_service_acct
|
prom_service_acct = var.prom_service_acct
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,42 +9,6 @@ users:
|
||||||
uid: 2000
|
uid: 2000
|
||||||
|
|
||||||
write_files:
|
write_files:
|
||||||
- path: /home/bs/prometheus/alertmanager.yml
|
|
||||||
permissions: 0644
|
|
||||||
owner: root
|
|
||||||
content: |
|
|
||||||
route:
|
|
||||||
group_by: [cluster, alertname]
|
|
||||||
# If an alert isn't caught by a route, send it to the pager.
|
|
||||||
receiver: noc-pager
|
|
||||||
routes:
|
|
||||||
- match:
|
|
||||||
severity: page
|
|
||||||
receiver: noc-pager
|
|
||||||
|
|
||||||
receivers:
|
|
||||||
- name: noc-pager
|
|
||||||
opsgenie_configs:
|
|
||||||
- api_key: ${opsgenie_key}
|
|
||||||
teams: SecOps
|
|
||||||
tags: satellite-api
|
|
||||||
|
|
||||||
- path: /home/bs/prometheus/rules/alerts.yml
|
|
||||||
permissions: 0644
|
|
||||||
owner: root
|
|
||||||
content: |
|
|
||||||
groups:
|
|
||||||
- name: node
|
|
||||||
rules:
|
|
||||||
#- alert: NoHostsInNetwork
|
|
||||||
# expr: sum by (name) (up{name=~".+"}) == 0
|
|
||||||
# for: 1m
|
|
||||||
# labels:
|
|
||||||
# severity: page
|
|
||||||
# annotations:
|
|
||||||
# summary: No hosts in network {{ $labels.name }}, production traffic impacted!
|
|
||||||
# description: There are currently no hosts up in the network {{ $labels.name }}, verify the instance groups. https://wiki.blockstream.io/OpsPlaybooks/Esplora-Runbooks#NoHostsInNetwork
|
|
||||||
|
|
||||||
- path: /home/bs/prometheus/prometheus.yml
|
- path: /home/bs/prometheus/prometheus.yml
|
||||||
permissions: 0644
|
permissions: 0644
|
||||||
owner: root
|
owner: root
|
||||||
|
@ -55,16 +19,6 @@ write_files:
|
||||||
external_labels:
|
external_labels:
|
||||||
project: satellite-api
|
project: satellite-api
|
||||||
|
|
||||||
rule_files:
|
|
||||||
- /config/rules/alerts.yml
|
|
||||||
|
|
||||||
alerting:
|
|
||||||
alertmanagers:
|
|
||||||
- scheme: http
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- "127.0.0.1:9093"
|
|
||||||
|
|
||||||
scrape_configs:
|
scrape_configs:
|
||||||
- job_name: prometheus
|
- job_name: prometheus
|
||||||
relabel_configs:
|
relabel_configs:
|
||||||
|
@ -91,7 +45,7 @@ write_files:
|
||||||
zone: us-west1-c
|
zone: us-west1-c
|
||||||
port: 9100
|
port: 9100
|
||||||
|
|
||||||
- job_name: satellite-api
|
- job_name: satellite-api-node-exporter
|
||||||
relabel_configs:
|
relabel_configs:
|
||||||
- source_labels:
|
- source_labels:
|
||||||
- '__meta_gce_label_network'
|
- '__meta_gce_label_network'
|
||||||
|
@ -104,40 +58,12 @@ write_files:
|
||||||
target_label: 'instance_name'
|
target_label: 'instance_name'
|
||||||
gce_sd_configs:
|
gce_sd_configs:
|
||||||
- project: satellite-api
|
- project: satellite-api
|
||||||
filter: (labels.type = "lightning-app")
|
|
||||||
zone: us-west1-a
|
zone: us-west1-a
|
||||||
port: 9100
|
port: 9100
|
||||||
- project: satellite-api
|
- project: satellite-api
|
||||||
filter: (labels.type = "lightning-app")
|
|
||||||
zone: us-west1-b
|
zone: us-west1-b
|
||||||
port: 9100
|
port: 9100
|
||||||
- project: satellite-api
|
- project: satellite-api
|
||||||
filter: (labels.type = "lightning-app")
|
|
||||||
zone: us-west1-c
|
|
||||||
port: 9100
|
|
||||||
|
|
||||||
- job_name: satellite-api-tor
|
|
||||||
relabel_configs:
|
|
||||||
- source_labels:
|
|
||||||
- '__meta_gce_label_network'
|
|
||||||
target_label: 'network'
|
|
||||||
- source_labels:
|
|
||||||
- '__meta_gce_label_name'
|
|
||||||
target_label: 'name'
|
|
||||||
- source_labels:
|
|
||||||
- '__meta_gce_instance_name'
|
|
||||||
target_label: 'instance_name'
|
|
||||||
gce_sd_configs:
|
|
||||||
- project: satellite-api
|
|
||||||
filter: (labels.type = "tor")
|
|
||||||
zone: us-west1-a
|
|
||||||
port: 9100
|
|
||||||
- project: satellite-api
|
|
||||||
filter: (labels.type = "tor")
|
|
||||||
zone: us-west1-b
|
|
||||||
port: 9100
|
|
||||||
- project: satellite-api
|
|
||||||
filter: (labels.type = "tor")
|
|
||||||
zone: us-west1-c
|
zone: us-west1-c
|
||||||
port: 9100
|
port: 9100
|
||||||
|
|
||||||
|
@ -156,16 +82,15 @@ write_files:
|
||||||
- project: satellite-api
|
- project: satellite-api
|
||||||
filter: (labels.type = "lightning-app-blc")
|
filter: (labels.type = "lightning-app-blc")
|
||||||
zone: us-west1-a
|
zone: us-west1-a
|
||||||
port: 9900
|
port: 9750
|
||||||
- project: satellite-api
|
- project: satellite-api
|
||||||
filter: (labels.type = "lightning-app-blc")
|
filter: (labels.type = "lightning-app-blc")
|
||||||
zone: us-west1-b
|
zone: us-west1-b
|
||||||
port: 9900
|
port: 9750
|
||||||
- project: satellite-api
|
- project: satellite-api
|
||||||
filter: (labels.type = "lightning-app-blc")
|
filter: (labels.type = "lightning-app-blc")
|
||||||
zone: us-west1-c
|
zone: us-west1-c
|
||||||
port: 9900
|
port: 9750
|
||||||
|
|
||||||
|
|
||||||
- path: /etc/systemd/system/prometheus.service
|
- path: /etc/systemd/system/prometheus.service
|
||||||
permissions: 0644
|
permissions: 0644
|
||||||
|
@ -180,7 +105,6 @@ write_files:
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=1
|
RestartSec=1
|
||||||
Environment=HOME=/home/bs
|
Environment=HOME=/home/bs
|
||||||
ExecStartPre=/usr/bin/docker-credential-gcr configure-docker
|
|
||||||
ExecStartPre=/usr/bin/docker pull ${prom_docker}
|
ExecStartPre=/usr/bin/docker pull ${prom_docker}
|
||||||
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 9090 -j ACCEPT
|
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 9090 -j ACCEPT
|
||||||
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 80 -j ACCEPT
|
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 80 -j ACCEPT
|
||||||
|
@ -193,7 +117,7 @@ write_files:
|
||||||
--name prometheus \
|
--name prometheus \
|
||||||
"${prom_docker}" --config.file=/config/prometheus.yml --web.enable-lifecycle --web.enable-admin-api --storage.tsdb.path=/data/metrics --storage.tsdb.retention=${retention}
|
"${prom_docker}" --config.file=/config/prometheus.yml --web.enable-lifecycle --web.enable-admin-api --storage.tsdb.path=/data/metrics --storage.tsdb.retention=${retention}
|
||||||
ExecStop=/usr/bin/docker stop prometheus
|
ExecStop=/usr/bin/docker stop prometheus
|
||||||
ExecStopPost=/usr/bin/docker rm prometheus
|
ExecStopPost=-/usr/bin/docker rm prometheus
|
||||||
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 9090 -j ACCEPT
|
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 9090 -j ACCEPT
|
||||||
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 80 -j ACCEPT
|
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 80 -j ACCEPT
|
||||||
ExecStopPost=/sbin/iptables -D PREROUTING -t nat -p tcp --dport 80 -j REDIRECT --to-port 9090
|
ExecStopPost=/sbin/iptables -D PREROUTING -t nat -p tcp --dport 80 -j REDIRECT --to-port 9090
|
||||||
|
@ -201,37 +125,6 @@ write_files:
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|
||||||
- path: /etc/systemd/system/alertmanager.service
|
|
||||||
permissions: 0644
|
|
||||||
owner: root
|
|
||||||
content: |
|
|
||||||
[Unit]
|
|
||||||
Description=alertmanager-server instance
|
|
||||||
Wants=gcr-online.target docker.service
|
|
||||||
After=gcr-online.service docker.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Restart=always
|
|
||||||
RestartSec=1
|
|
||||||
Environment=HOME=/home/bs
|
|
||||||
ExecStartPre=/usr/bin/docker-credential-gcr configure-docker
|
|
||||||
ExecStartPre=/usr/bin/docker pull ${prom_docker}
|
|
||||||
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 9093 -j ACCEPT
|
|
||||||
ExecStart=/usr/bin/docker run \
|
|
||||||
--network=host \
|
|
||||||
-v /mnt/disks/data:/data:rw \
|
|
||||||
-v /home/bs/prometheus:/config:ro \
|
|
||||||
--read-only \
|
|
||||||
--name alertmanager \
|
|
||||||
--entrypoint=/bin/alertmanager \
|
|
||||||
"${prom_docker}" --config.file=/config/alertmanager.yml
|
|
||||||
ExecStop=/usr/bin/docker stop alertmanager
|
|
||||||
ExecStopPost=/usr/bin/docker rm alertmanager
|
|
||||||
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 9093 -j ACCEPT
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
|
|
||||||
- path: /etc/systemd/system/node-exporter.service
|
- path: /etc/systemd/system/node-exporter.service
|
||||||
permissions: 0644
|
permissions: 0644
|
||||||
owner: root
|
owner: root
|
||||||
|
@ -245,7 +138,6 @@ write_files:
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=1
|
RestartSec=1
|
||||||
Environment=HOME=/home/bs
|
Environment=HOME=/home/bs
|
||||||
ExecStartPre=/usr/bin/docker-credential-gcr configure-docker
|
|
||||||
ExecStartPre=/usr/bin/docker pull ${node_exporter_docker}
|
ExecStartPre=/usr/bin/docker pull ${node_exporter_docker}
|
||||||
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 9100 -j ACCEPT
|
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 9100 -j ACCEPT
|
||||||
ExecStart=/usr/bin/docker run \
|
ExecStart=/usr/bin/docker run \
|
||||||
|
@ -256,9 +148,10 @@ write_files:
|
||||||
-v /sys:/host/sys:ro \
|
-v /sys:/host/sys:ro \
|
||||||
-v /:/rootfs:ro \
|
-v /:/rootfs:ro \
|
||||||
-v metrics:/metrics:ro \
|
-v metrics:/metrics:ro \
|
||||||
"${node_exporter_docker}" --path.procfs /host/proc --path.sysfs /host/sys --collector.textfile.directory /metrics --collector.filesystem.ignored-mount-points "^/(sys|proc|dev|host|etc($|/))"
|
-v /var/run/dbus/system_bus_socket:/var/run/dbus/system_bus_socket:ro \
|
||||||
|
"${node_exporter_docker}" --path.procfs /host/proc --path.sysfs /host/sys --collector.textfile.directory /metrics --collector.filesystem.ignored-mount-points "^/(sys|proc|dev|host|etc($|/))" --collector.systemd
|
||||||
ExecStop=/usr/bin/docker stop node-exporter
|
ExecStop=/usr/bin/docker stop node-exporter
|
||||||
ExecStopPost=/usr/bin/docker rm node-exporter
|
ExecStopPost=-/usr/bin/docker rm node-exporter
|
||||||
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 9100 -j ACCEPT
|
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 9100 -j ACCEPT
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
|
@ -268,9 +161,5 @@ runcmd:
|
||||||
- /bin/mkdir -p /mnt/disks/data/metrics
|
- /bin/mkdir -p /mnt/disks/data/metrics
|
||||||
- /bin/chown nobody:nobody /mnt/disks/data/metrics
|
- /bin/chown nobody:nobody /mnt/disks/data/metrics
|
||||||
- systemctl daemon-reload
|
- systemctl daemon-reload
|
||||||
- systemctl start prometheus.service
|
- systemctl enable --now prometheus.service
|
||||||
- systemctl enable prometheus.service
|
- systemctl enable --now node-exporter.service
|
||||||
- systemctl start alertmanager.service
|
|
||||||
- systemctl enable alertmanager.service
|
|
||||||
- systemctl start node-exporter.service
|
|
||||||
- systemctl enable node-exporter.service
|
|
||||||
|
|
|
@ -12,7 +12,6 @@ data "template_file" "prometheus" {
|
||||||
prom_docker = var.prom_docker
|
prom_docker = var.prom_docker
|
||||||
node_exporter_docker = var.node_exporter_docker
|
node_exporter_docker = var.node_exporter_docker
|
||||||
retention = var.retention
|
retention = var.retention
|
||||||
opsgenie_key = var.opsgenie_key
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ resource "google_compute_firewall" "all-traffic" {
|
||||||
ports = ["80"]
|
ports = ["80"]
|
||||||
}
|
}
|
||||||
|
|
||||||
source_ranges = ["${var.prom_allowed_source_ip}/32"]
|
source_ranges = var.prom_allowed_source_ip
|
||||||
|
|
||||||
target_service_accounts = [
|
target_service_accounts = [
|
||||||
google_service_account.prometheus[0].email,
|
google_service_account.prometheus[0].email,
|
||||||
|
|
|
@ -52,7 +52,7 @@ resource "google_compute_instance" "prometheus-server" {
|
||||||
|
|
||||||
boot_disk {
|
boot_disk {
|
||||||
initialize_params {
|
initialize_params {
|
||||||
size = "10"
|
size = "20"
|
||||||
image = var.boot_image
|
image = var.boot_image
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,11 +42,7 @@ variable "prom_service_acct" {
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "prom_allowed_source_ip" {
|
variable "prom_allowed_source_ip" {
|
||||||
type = string
|
type = list(any)
|
||||||
}
|
|
||||||
|
|
||||||
variable "opsgenie_key" {
|
|
||||||
type = string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "prom_docker" {
|
variable "prom_docker" {
|
||||||
|
@ -56,4 +52,3 @@ variable "prom_docker" {
|
||||||
variable "node_exporter_docker" {
|
variable "node_exporter_docker" {
|
||||||
type = string
|
type = string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -98,8 +98,8 @@ variable "lb_svc_acct" {
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "prom_allowed_source_ip" {
|
variable "prom_allowed_source_ip" {
|
||||||
type = string
|
type = list(any)
|
||||||
default = ""
|
default = []
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "opsgenie_key" {
|
variable "opsgenie_key" {
|
||||||
|
@ -227,17 +227,17 @@ variable "charge_docker" {
|
||||||
|
|
||||||
variable "tor_docker" {
|
variable "tor_docker" {
|
||||||
type = string
|
type = string
|
||||||
default = "blockstream/tor@sha256:46594b0a84f7503de70078652e7bd94f6152b7976d11779ad9f143f02508284c"
|
default = "blockstream/tor:0.4.3.7"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "node_exporter_docker" {
|
variable "node_exporter_docker" {
|
||||||
type = string
|
type = string
|
||||||
default = "prom/node-exporter@sha256:55302581333c43d540db0e144cf9e7735423117a733cdec27716d87254221086"
|
default = "prom/node-exporter:v1.1.2"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "prom_docker" {
|
variable "prom_docker" {
|
||||||
type = string
|
type = string
|
||||||
default = "blockstream/prometheus@sha256:cab8c2359ab187aa6c9e9c7fcfcc3060b62742417030a77862c747e091d3c6d6"
|
default = "prom/prometheus:v2.29.1"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "gcloud_docker" {
|
variable "gcloud_docker" {
|
||||||
|
|
Loading…
Add table
Reference in a new issue