1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
# File 'manifests/node_exporter.pp', line 1
class prometheus::node_exporter (
String $image = 'prom/node-exporter',
String $version = '0.15.2',
String $download_url = 'https://github.com/prometheus/node_exporter/releases/download/v#VERSION#/node_exporter-#VERSION#.linux-amd64.tar.gz',
String $sha256sums_url = 'https://github.com/prometheus/node_exporter/releases/download/v#VERSION#/sha256sums.txt',
String $signature_url = 'https://releases.tarmak.io/signatures/node_exporter/#VERSION#/sha256sums.txt.asc',
$port = 9100,
)
{
include ::prometheus
$namespace = $::prometheus::namespace
$ignored_mount_points = '^/(sys|proc|dev|host|etc)($|/)'
# Setup deployment scrapes and rules for node_exporter
if $::prometheus::role == 'master' {
include ::prometheus::server
$kubernetes_token_file = $::prometheus::server::kubernetes_token_file
$kubernetes_ca_file = $::prometheus::server::kubernetes_ca_file
prometheus::rule { 'NodeHighCPUUsage':
expr => '(100 - (avg(irate(node_cpu{mode="idle"}[5m])) WITHOUT (cpu) * 100)) > 80',
for => '5m',
summary => '{{$labels.instance}}: High CPU usage detected',
description => '{{$labels.instance}}: CPU usage is above 80% (current value is: {{ $value }})',
}
prometheus::rule { 'NodeHighLoadAverage':
expr => '((node_load5 / count without (cpu, mode) (node_cpu{mode="system"})) > 3)',
for => '5m',
summary => '{{$labels.instance}}: High load average detected',
description => '{{$labels.instance}}: 5 minute load average is {{$value}}',
}
# TODO: Alert if diskspace is running out in x hours
prometheus::rule { 'NodeLowDiskSpace':
expr => '((node_filesystem_size - node_filesystem_free ) / node_filesystem_size * 100) > 75',
for => '2m',
summary => '{{$labels.instance}}: Low disk space',
description => '{{$labels.instance}}: Disk usage is above 75% (current value is: {{ $value }}%)',
}
# TODO: Alert when swap is in use
prometheus::rule { 'NodeSwapEnabled':
expr => '(((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75',
for => '2m',
summary => '{{$labels.instance}}: Swap usage detected',
description => '{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})',
}
prometheus::rule { 'NodeHighMemoryUsage':
expr => '(((node_memory_MemTotal-node_memory_MemFree-node_memory_Cached)/(node_memory_MemTotal)*100)) > 80',
for => '5m',
summary => '{{$labels.instance}}: High memory usage detected',
description => '{{$labels.instance}}: Memory usage usage is above 80% (current value is: {{ $value }})',
}
# scrape node exporter running on etcd nodes
$etcd_node_exporters = $::prometheus::etcd_cluster.map |$node| { "${node}:${port}" }
prometheus::scrape_config { 'etcd-nodes-exporter':
order => 135,
config => {
'static_configs' => [{
'targets' => $etcd_node_exporters,
'labels' => {'role' => 'etcd'},
}],
}
}
$external_scrape_targets_only = $::prometheus::external_scrape_targets_only
if ! $external_scrape_targets_only {
kubernetes::apply{'node-exporter':
manifests => [
template('prometheus/prometheus-ns.yaml.erb'),
template('prometheus/node-exporter-ds.yaml.erb'),
],
}
# scrape node exporter running on every kubernetes node (through api proxy)
prometheus::scrape_config { 'kubernetes-nodes-exporter':
order => 130,
config => {
'kubernetes_sd_configs' => [{
'role' => 'node',
}],
'tls_config' => {
'ca_file' => $kubernetes_ca_file,
},
'bearer_token_file' => $kubernetes_token_file,
'scheme' => 'https',
'relabel_configs' => [{
'action' => 'labelmap',
'regex' => '__meta_kubernetes_node_label_(.+)',
},{
'target_label' => '__address__',
'replacement' => 'kubernetes.default.svc:443',
}, {
'source_labels' => ['__meta_kubernetes_node_name'],
'regex' => '(.+)',
'target_label' => '__metrics_path__',
'replacement' => "/api/v1/nodes/\${1}:${port}/proxy/metrics",
}],
}
}
}
}
# Setup node_exporter service on etcd nodes
if $::prometheus::role == 'etcd' {
$_download_url = regsubst($download_url, '#VERSION#' ,$version , 'G')
$_signature_url = regsubst($signature_url, '#VERSION#' ,$version , 'G')
$_sha256sums_url = regsubst($sha256sums_url, '#VERSION#' ,$version , 'G')
$dest_dir = "/opt/node_exporter-${version}"
file { $dest_dir:
ensure => directory,
mode => '0755',
}
-> archive { "${dest_dir}/node_exporter.tar.gz":
ensure => present,
extract => true,
extract_path => $dest_dir,
extract_command => 'tar xfz %s --strip-components=1',
source => $_download_url,
sha256sums => $_sha256sums_url,
signature_armored => $_signature_url,
provider => 'airworthy',
}
-> file { "${::prometheus::systemd_path}/node-exporter.service":
ensure => file,
content => template('prometheus/node-exporter.service.erb'),
notify => Exec["${module_name}-systemctl-daemon-reload"],
}
-> service { 'node-exporter':
ensure => running,
enable => true,
}
}
}
|