lots of changes; adding prometheus instance, and api.blockstream.space adjustments

This commit is contained in:
nitram 2019-02-26 16:44:51 -08:00
parent 8badc6a9ea
commit fe13af400e
No known key found for this signature in database
GPG key ID: 2352C35346C5D534
16 changed files with 292 additions and 238 deletions

View file

@ -85,8 +85,9 @@ plan_misc:
&& terraform plan
-var "region=$REGION"
-var "zone=$ZONE"
-var "tor_instance_type=$TOR_INSTANCE_TYPE"
-var "instance_type=$INSTANCE_TYPE"
-var "onion_host=$ONION_HOST"
-var "prom_allowed_source_ip=$PROMETHEUS_ALLOWED_SOURCE_IP"
-input=false)
# Tag with staging_v.* to deploy staging (e.g. staging_v0.1.1)
@ -151,8 +152,9 @@ deploy_misc:
&& terraform apply
-var "region=$REGION"
-var "zone=$ZONE"
-var "tor_instance_type=$TOR_INSTANCE_TYPE"
-var "instance_type=$INSTANCE_TYPE"
-var "onion_host=$ONION_HOST"
-var "prom_allowed_source_ip=$PROMETHEUS_ALLOWED_SOURCE_IP"
-input=false -auto-approve)
# Pushing to this branch destroys the staging infrastructure

View file

@ -1,19 +1,3 @@
data "terraform_remote_state" "lightning-store-prod" {
backend = "gcs"
config {
bucket = "tf-state-lightning-store"
prefix = "terraform/state"
project = "blockstream-store"
}
workspace = "production"
defaults {
prometheus_service_account = "${var.prom_service_acct}"
}
}
data "terraform_remote_state" "blc-prod" {
backend = "gcs"
@ -24,4 +8,8 @@ data "terraform_remote_state" "blc-prod" {
}
workspace = "prod"
defaults {
prometheus_service_account = "${var.prom_service_acct}"
}
}

View file

@ -36,11 +36,11 @@ module "blc" {
# CI vars
region = "${var.region}"
zone = "${var.zone}"
instance_type = "${var.instance_type}"
instance_type = "${var.instance_type[0]}"
host = "${var.host}"
ssl_cert = "${var.ssl_cert}"
ssl_cert = ["${var.ssl_cert}"]
timeout = "${var.timeout}"
prom_service_acct = "${data.terraform_remote_state.lightning-store-prod.prometheus_service_account}"
prom_service_acct = "${data.terraform_remote_state.blc-prod.prometheus_service_account}"
opsgenie_key = "${var.opsgenie_key}"
rpcuser = "${var.rpcuser}"
rpcpass = "${var.rpcpass}"
@ -65,7 +65,27 @@ module "tor" {
#CI vars
region = "${var.region}"
zone = "${var.zone}"
tor_instance_type = "${var.tor_instance_type}"
instance_type = "${var.instance_type[1]}"
onion_host = "${var.onion_host}"
prom_service_acct = "${data.terraform_remote_state.lightning-store-prod.prometheus_service_account}"
prom_service_acct = "${data.terraform_remote_state.blc-prod.prometheus_service_account}"
}
module "prometheus" {
source = "modules/prometheus"
project = "${var.project}"
network = "default"
name = "satapi-prometheus"
prom_docker = "${var.prom_docker}"
node_exporter_docker = "${var.node_exporter_docker}"
create_resources = "${local.create_misc}"
#CI vars
region = "${var.region}"
zone = "${var.zone}"
instance_type = "${var.instance_type[2]}"
prom_allowed_source_ip = "${var.prom_allowed_source_ip}"
opsgenie_key = "${var.opsgenie_key}"
prom_service_acct = "${terraform.workspace != "misc" ? data.terraform_remote_state.blc-prod.prometheus_service_account : ""}"
}

View file

@ -49,11 +49,11 @@ variable "net" {
}
variable "ssl_cert" {
type = "string"
type = "list"
}
variable "host" {
type = "string"
type = "list"
}
variable "timeout" {

View file

@ -2,14 +2,14 @@ bootcmd:
- blkid /dev/disk/by-id/google-data || mkfs.ext4 -L data -E lazy_itable_init=0,lazy_journal_init=0,discard /dev/disk/by-id/google-data
mounts:
- [ /dev/disk/by-label/data, /mnt/disks/data, auto, defaults ]
- [ /dev/disk/by-label/google-data, /mnt/disks/data, auto, defaults ]
users:
- name: exec
- name: bs
uid: 2000
write_files:
- path: /home/exec/prometheus/alertmanager.yml
- path: /home/bs/prometheus/alertmanager.yml
permissions: 0644
owner: root
content: |
@ -26,20 +26,13 @@ write_files:
receiver: noc-pager
receivers:
- name: noc-email
email_configs:
- to: noc@blockstream.io
from: noreply@blockstream.io
smarthost: smtp.gmail.com:587
auth_username: ${email_username}
auth_password: ${email_password}
- name: noc-pager
opsgenie_configs:
- api_key: ${opsgenie_api_key}
- api_key: ${opsgenie_key}
teams: SecOps
tags: lightning-store
tags: satellite-api
- path: /home/exec/prometheus/rules/alerts.yml
- path: /home/bs/prometheus/rules/alerts.yml
permissions: 0644
owner: root
content: |
@ -55,7 +48,7 @@ write_files:
# summary: No hosts in network {{ $labels.name }}, production traffic impacted!
# description: There are currently no hosts up in the network {{ $labels.name }}, verify the instance groups. https://wiki.blockstream.io/OpsPlaybooks/Esplora-Runbooks#NoHostsInNetwork
- path: /home/exec/prometheus/prometheus.yml
- path: /home/bs/prometheus/prometheus.yml
permissions: 0644
owner: root
content: |
@ -63,7 +56,7 @@ write_files:
evaluation_interval: 15s
scrape_interval: 15s
external_labels:
project: blockstream-store
project: satellite-api
rule_files:
- /config/rules/alerts.yml
@ -88,122 +81,19 @@ write_files:
- '__meta_gce_instance_name'
target_label: 'instance_name'
gce_sd_configs:
- project: blockstream-store
zone: asia-northeast1-a
- project: satellite-api
filter: (labels.type = "prometheus")
zone: us-west1-a
port: 9100
- project: blockstream-store
zone: asia-northeast1-b
- project: satellite-api
filter: (labels.type = "prometheus")
zone: us-west1-b
port: 9100
- project: blockstream-store
zone: asia-northeast1-c
- project: satellite-api
filter: (labels.type = "prometheus")
zone: us-west1-c
port: 9100
- project: blockstream-store
zone: europe-west4-a
filter: (labels.type = "prometheus")
port: 9100
- project: blockstream-store
zone: europe-west4-b
filter: (labels.type = "prometheus")
port: 9100
- project: blockstream-store
zone: europe-west4-c
filter: (labels.type = "prometheus")
port: 9100
- project: blockstream-store
zone: us-central1-a
filter: (labels.type = "prometheus")
port: 9100
- project: blockstream-store
zone: us-central1-b
filter: (labels.type = "prometheus")
port: 9100
- project: blockstream-store
zone: us-central1-c
filter: (labels.type = "prometheus")
port: 9100
- project: blockstream-store
zone: us-central1-f
filter: (labels.type = "prometheus")
port: 9100
- project: blockstream-store
zone: us-east1-d
filter: (labels.type = "prometheus")
port: 9100
- project: blockstream-store
zone: us-east1-b
filter: (labels.type = "prometheus")
port: 9100
- project: blockstream-store
zone: us-east1-c
filter: (labels.type = "prometheus")
port: 9100
- job_name: wordpress
relabel_configs:
- source_labels:
- '__meta_gce_label_network'
target_label: 'network'
- source_labels:
- '__meta_gce_label_name'
target_label: 'name'
- source_labels:
- '__meta_gce_instance_name'
target_label: 'instance_name'
gce_sd_configs:
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: asia-northeast1-a
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: asia-northeast1-b
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: asia-northeast1-c
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: europe-west4-a
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: europe-west4-b
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: europe-west4-c
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: us-central1-a
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: us-central1-b
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: us-central1-c
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: us-central1-f
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: us-east1-d
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: us-east1-b
port: 9100
- project: blockstream-store
filter: (labels.type = "wordpress")
zone: us-east1-c
port: 9100
- job_name: satellite-api
relabel_configs:
- source_labels:
@ -216,71 +106,20 @@ write_files:
- '__meta_gce_instance_name'
target_label: 'instance_name'
gce_sd_configs:
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: asia-northeast1-a
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: asia-northeast1-b
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: asia-northeast1-c
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: europe-west4-a
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: europe-west4-b
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: europe-west4-c
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: us-central1-a
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: us-central1-b
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: us-central1-c
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: us-central1-f
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: us-east1-d
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: us-east1-b
port: 9100
- project: blockstream-store
filter: (labels.type = "lightning-app")
zone: us-east1-c
port: 9100
- project: blockstream-store
- project: satellite-api
filter: (labels.type = "lightning-app")
zone: us-west1-a
port: 9100
- project: blockstream-store
- project: satellite-api
filter: (labels.type = "lightning-app")
zone: us-west1-b
port: 9100
- project: blockstream-store
- project: satellite-api
filter: (labels.type = "lightning-app")
zone: us-west1-c
port: 9100
- job_name: btcpayserver
- job_name: satellite-api-tor
relabel_configs:
- source_labels:
- '__meta_gce_label_network'
@ -292,19 +131,20 @@ write_files:
- '__meta_gce_instance_name'
target_label: 'instance_name'
gce_sd_configs:
- project: blockstream-store
filter: (labels.type = "btcpayserver")
- project: satellite-api
filter: (labels.type = "tor")
zone: us-west1-a
port: 9100
- project: blockstream-store
filter: (labels.type = "btcpayserver")
- project: satellite-api
filter: (labels.type = "tor")
zone: us-west1-b
port: 9100
- project: blockstream-store
filter: (labels.type = "btcpayserver")
- project: satellite-api
filter: (labels.type = "tor")
zone: us-west1-c
port: 9100
- path: /etc/systemd/system/prometheus.service
permissions: 0644
owner: root
@ -317,19 +157,19 @@ write_files:
[Service]
Restart=always
RestartSec=1
Environment=HOME=/home/exec
Environment=HOME=/home/bs
ExecStartPre=/usr/bin/docker-credential-gcr configure-docker
ExecStartPre=/usr/bin/docker pull ${docker_tag}
ExecStartPre=/usr/bin/docker pull ${prom_docker}
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 9090 -j ACCEPT
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 80 -j ACCEPT
ExecStartPre=/sbin/iptables -A PREROUTING -t nat -p tcp --dport 80 -j REDIRECT --to-port 9090
ExecStart=/usr/bin/docker run \
--network=host \
-v /mnt/disks/data:/data:rw \
-v /home/exec/prometheus:/config:ro \
-v /home/bs/prometheus:/config:ro \
--read-only \
--name prometheus \
"${docker_tag}" --config.file=/config/prometheus.yml --web.enable-lifecycle --web.enable-admin-api --storage.tsdb.path=/data/metrics --storage.tsdb.retention=${retention}
"${prom_docker}" --config.file=/config/prometheus.yml --web.enable-lifecycle --web.enable-admin-api --storage.tsdb.path=/data/metrics --storage.tsdb.retention=${retention}
ExecStop=/usr/bin/docker stop prometheus
ExecStopPost=/usr/bin/docker rm prometheus
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 9090 -j ACCEPT
@ -338,6 +178,7 @@ write_files:
[Install]
WantedBy=multi-user.target
- path: /etc/systemd/system/alertmanager.service
permissions: 0644
owner: root
@ -350,24 +191,25 @@ write_files:
[Service]
Restart=always
RestartSec=1
Environment=HOME=/home/exec
Environment=HOME=/home/bs
ExecStartPre=/usr/bin/docker-credential-gcr configure-docker
ExecStartPre=/usr/bin/docker pull ${docker_tag}
ExecStartPre=/usr/bin/docker pull ${prom_docker}
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 9093 -j ACCEPT
ExecStart=/usr/bin/docker run \
--network=host \
-v /mnt/disks/data:/data:rw \
-v /home/exec/prometheus:/config:ro \
-v /home/bs/prometheus:/config:ro \
--read-only \
--name alertmanager \
--entrypoint=/bin/alertmanager \
"${docker_tag}" --config.file=/config/alertmanager.yml
"${prom_docker}" --config.file=/config/alertmanager.yml
ExecStop=/usr/bin/docker stop alertmanager
ExecStopPost=/usr/bin/docker rm alertmanager
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 9093 -j ACCEPT
[Install]
WantedBy=multi-user.target
- path: /etc/systemd/system/node-exporter.service
permissions: 0644
owner: root
@ -380,9 +222,9 @@ write_files:
[Service]
Restart=always
RestartSec=1
Environment=HOME=/home/exec
Environment=HOME=/home/bs
ExecStartPre=/usr/bin/docker-credential-gcr configure-docker
ExecStartPre=/usr/bin/docker pull ${docker_tag_node_exporter}
ExecStartPre=/usr/bin/docker pull ${node_exporter_docker}
ExecStartPre=/sbin/iptables -A INPUT -m tcp -p tcp --dport 9100 -j ACCEPT
ExecStart=/usr/bin/docker run \
--name=node-exporter \
@ -392,7 +234,7 @@ write_files:
-v /sys:/host/sys:ro \
-v /:/rootfs:ro \
-v metrics:/metrics:ro \
"${docker_tag_node_exporter}" --path.procfs /host/proc --path.sysfs /host/sys --collector.textfile.directory /metrics --collector.filesystem.ignored-mount-points "^/(sys|proc|dev|host|etc($|/))"
"${node_exporter_docker}" --path.procfs /host/proc --path.sysfs /host/sys --collector.textfile.directory /metrics --collector.filesystem.ignored-mount-points "^/(sys|proc|dev|host|etc($|/))"
ExecStop=/usr/bin/docker stop node-exporter
ExecStopPost=/usr/bin/docker rm node-exporter
ExecStopPost=/sbin/iptables -D INPUT -m tcp -p tcp --dport 9100 -j ACCEPT

View file

@ -0,0 +1,24 @@
data "google_compute_network" "default" {
name = "default"
}
data "template_file" "prometheus" {
template = "${file("${path.module}/cloud-init/prometheus.yml")}"
vars {
prom_docker = "${var.prom_docker}"
node_exporter_docker = "${var.node_exporter_docker}"
retention = "${var.retention}"
opsgenie_key = "${var.opsgenie_key}"
}
}
data "template_cloudinit_config" "prometheus" {
gzip = false
base64_encode = false
part {
content_type = "text/cloud-config"
content = "${data.template_file.prometheus.rendered}"
}
}

View file

@ -0,0 +1,17 @@
resource "google_compute_firewall" "all-traffic" {
name = "prometheus-${var.name}-all-traffic-access"
network = "${data.google_compute_network.default.self_link}"
count = "${var.create_resources}"
allow {
protocol = "tcp"
ports = ["80"]
}
source_ranges = ["${var.prom_allowed_source_ip}/32"]
target_service_accounts = [
"${google_service_account.prometheus.email}",
]
}

View file

@ -0,0 +1,14 @@
resource "google_service_account" "prometheus" {
account_id = "${var.name}"
display_name = "${var.name}"
count = "${var.create_resources}"
}
resource "google_project_iam_member" "prometheus" {
project = "${var.project}"
role = "roles/editor"
member = "serviceAccount:${google_service_account.prometheus.email}"
count = "${var.create_resources}"
}

View file

@ -0,0 +1,77 @@
resource "google_compute_disk" "prometheus-data" {
name = "${var.name}-data-disk"
project = "${var.project}"
type = "pd-standard"
zone = "${var.zone}"
size = "50"
count = "${var.create_resources}"
}
resource "google_compute_address" "prometheus-address" {
name = "${var.name}-address"
project = "${var.project}"
region = "${var.region}"
count = "${var.create_resources}"
}
resource "google_compute_address" "prometheus-internal-address" {
name = "${var.name}-internal-address"
project = "${var.project}"
region = "${var.region}"
address_type = "INTERNAL"
count = "${var.create_resources}"
}
locals {
service_account = "${terraform.workspace == "misc" ? element(concat(google_service_account.prometheus.*.email, list("")), 0) : var.prom_service_acct}"
}
resource "google_compute_instance" "prometheus-server" {
name = "${var.name}"
machine_type = "${var.instance_type}"
zone = "${var.zone}"
project = "${var.project}"
allow_stopping_for_update = true
count = "${var.create_resources}"
labels {
type = "prometheus"
name = "${var.name}"
network = "${var.network}"
}
service_account {
email = "${local.service_account}"
scopes = [
"https://www.googleapis.com/auth/compute.readonly",
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/pubsub",
]
}
boot_disk {
initialize_params {
size = "10"
image = "${var.boot_image}"
}
}
attached_disk {
source = "${element(google_compute_disk.prometheus-data.*.name, count.index)}"
device_name = "data"
}
network_interface {
network = "${data.google_compute_network.default.self_link}"
network_ip = "${element(google_compute_address.prometheus-internal-address.*.address, count.index)}"
access_config {
nat_ip = "${element(google_compute_address.prometheus-address.*.address, count.index)}"
}
}
metadata {
"user-data" = "${data.template_cloudinit_config.prometheus.rendered}"
}
}

View file

@ -0,0 +1,3 @@
output "prom_svc_acct" {
value = "${element(concat(google_service_account.prometheus.*.email, list("")), 0)}"
}

View file

@ -0,0 +1,58 @@
variable "boot_image" {
type = "string"
default = "cos-cloud/cos-stable"
}
variable "network" {
type = "string"
default = "default"
}
variable "retention" {
type = "string"
default = "31d"
}
variable "project" {
type = "string"
}
variable "name" {
type = "string"
}
variable "region" {
type = "string"
}
variable "zone" {
type = "string"
}
variable "instance_type" {
type = "string"
}
variable "create_resources" {
type = "string"
}
variable "prom_service_acct" {
type = "string"
}
variable "prom_allowed_source_ip" {
type = "string"
}
variable "opsgenie_key" {
type = "string"
}
variable "prom_docker" {
type = "string"
}
variable "node_exporter_docker" {
type = "string"
}

View file

@ -23,7 +23,7 @@ resource "google_compute_region_instance_group_manager" "tor" {
resource "google_compute_instance_template" "tor" {
name_prefix = "${var.name}-template-"
description = "This template is used to create ${var.name} instances."
machine_type = "${var.tor_instance_type}"
machine_type = "${var.instance_type}"
count = "${var.create_resources}"
labels {

View file

@ -24,7 +24,7 @@ variable "zone" {
type = "string"
}
variable "tor_instance_type" {
variable "instance_type" {
type = "string"
}
@ -67,4 +67,4 @@ variable "node_exporter_docker" {
variable "gcloud_docker" {
type = "string"
}
}

View file

@ -1,3 +1,7 @@
output "blc_backend_service" {
value = "${module.blc.backend_service}"
}
output "prom_sa" {
value = "${module.prometheus.prom_svc_acct}"
}

View file

@ -51,8 +51,8 @@ variable "create_resources" {
}
variable "ssl_cert" {
type = "string"
default = ""
type = "list"
default = ["", ""]
}
variable "rpcuser" {
@ -66,8 +66,8 @@ variable "rpcpass" {
}
variable "host" {
type = "string"
default = ""
type = "list"
default = ["", ""]
}
variable "onion_host" {
@ -86,13 +86,8 @@ variable "zone" {
}
variable "instance_type" {
type = "string"
default = ""
}
variable "tor_instance_type" {
type = "string"
default = ""
type = "list"
default = ["", "", ""]
}
variable "timeout" {
@ -105,6 +100,11 @@ variable "prom_service_acct" {
default = ""
}
variable "prom_allowed_source_ip" {
type = "string"
default = ""
}
variable "opsgenie_key" {
type = "string"
default = ""
@ -129,7 +129,7 @@ variable "bitcoin_docker" {
variable "lightning_docker" {
type = "string"
default = "blockstream/lightningd@sha256:97f5e8e7a6574b05dcfd88debb2e7b1c6b40ff6d6ebfe911cf3134a2b47c5c26"
default = "blockstream/lightningd@sha256:8201324d6177691b359059005be34ee944bc07fc379550af3a99d76209eabc76"
}
variable "charge_docker" {
@ -147,6 +147,11 @@ variable "node_exporter_docker" {
default = "prom/node-exporter@sha256:55302581333c43d540db0e144cf9e7735423117a733cdec27716d87254221086"
}
variable "prom_docker" {
type = "string"
default = "blockstream/prometheus@sha256:cab8c2359ab187aa6c9e9c7fcfcc3060b62742417030a77862c747e091d3c6d6"
}
variable "gcloud_docker" {
type = "string"
default = "google/cloud-sdk@sha256:b0d0555efef6a566f42fc4f0d89be9e1d74aff4565e27bbd206405f759d3f2b0"