mirror of
https://gitea.com/mcereda/oam.git
synced 2026-02-09 05:44:23 +00:00
chore: improve notes about prometheus and grafana
This commit is contained in:
4
.vscode/settings.json
vendored
4
.vscode/settings.json
vendored
@@ -120,6 +120,9 @@
|
|||||||
"csma",
|
"csma",
|
||||||
"daemonless",
|
"daemonless",
|
||||||
"datagram",
|
"datagram",
|
||||||
|
"datasource",
|
||||||
|
"datasources",
|
||||||
|
"dblab",
|
||||||
"deluser",
|
"deluser",
|
||||||
"devel",
|
"devel",
|
||||||
"dhclient",
|
"dhclient",
|
||||||
@@ -230,6 +233,7 @@
|
|||||||
"pinentry",
|
"pinentry",
|
||||||
"pipx",
|
"pipx",
|
||||||
"pkexec",
|
"pkexec",
|
||||||
|
"pkill",
|
||||||
"polkit",
|
"polkit",
|
||||||
"portsnap",
|
"portsnap",
|
||||||
"posix",
|
"posix",
|
||||||
|
|||||||
@@ -1,78 +0,0 @@
|
|||||||
# Grafana on Kubernetes using Helm
|
|
||||||
|
|
||||||
## Table of contents <!-- omit in toc -->
|
|
||||||
|
|
||||||
1. [Usage](#usage)
|
|
||||||
1. [Gotchas](#gotchas)
|
|
||||||
1. [Further readings](#further-readings)
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
Installation:
|
|
||||||
|
|
||||||
1. add the repository:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
helm repo add grafana https://grafana.github.io/helm-charts
|
|
||||||
helm repo update
|
|
||||||
```
|
|
||||||
|
|
||||||
1. install the release:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
helm upgrade --install --namespace monitoring --create-namespace grafana grafana/grafana
|
|
||||||
```
|
|
||||||
|
|
||||||
Get the admin user's password:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
kubectl get secret --namespace monitoring grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
|
|
||||||
```
|
|
||||||
|
|
||||||
The Grafana server can be accessed via port 80 on `grafana.monitoring.svc.cluster.local` from within the cluster.<br/>
|
|
||||||
To get the external URL:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
export POD_NAME=$(kubectl get pods --namespace monitoring -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana" -o jsonpath="{.items[0].metadata.name}")
|
|
||||||
kubectl --namespace monitoring port-forward "${POD_NAME}" 3000
|
|
||||||
```
|
|
||||||
|
|
||||||
Clean up:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
helm delete --namespace monitoring grafana
|
|
||||||
kubectl delete namespace --ignore-not-found monitoring
|
|
||||||
```
|
|
||||||
|
|
||||||
Access a Prometheus instance in the same namespace using `http://prometheus-server`
|
|
||||||
|
|
||||||
Get the default values from the updated chart
|
|
||||||
|
|
||||||
```sh
|
|
||||||
helm inspect values grafana/grafana > "$(git rev-parse --show-toplevel)/kubernetes/helm/grafana/values.yaml"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Gotchas
|
|
||||||
|
|
||||||
Useful dashboards:
|
|
||||||
|
|
||||||
- `3662`: prometheus 2.0 overview
|
|
||||||
- `6417`: kubernetes cluster (prometheus)
|
|
||||||
- `9632`: nextcloud
|
|
||||||
|
|
||||||
## Further readings
|
|
||||||
|
|
||||||
- Official [helm chart]
|
|
||||||
- [Set up prometheus and ingress on kubernetes]
|
|
||||||
- [How to integrate Prometheus and Grafana on Kubernetes using Helm]
|
|
||||||
|
|
||||||
<!--
|
|
||||||
References
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- Upstream -->
|
|
||||||
[helm chart]: https://github.com/grafana/helm-charts/tree/main/charts/grafana
|
|
||||||
|
|
||||||
<!-- Others -->
|
|
||||||
[how to integrate prometheus and grafana on kubernetes using helm]: https://semaphoreci.com/blog/prometheus-grafana-kubernetes-helm
|
|
||||||
[set up prometheus and ingress on kubernetes]: https://blog.gojekengineering.com/diy-how-to-set-up-prometheus-and-ingress-on-kubernetes-d395248e2ba
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
# Prometheus on Kubernetes using Helm
|
|
||||||
|
|
||||||
## Table of contents <!-- omit in toc -->
|
|
||||||
|
|
||||||
1. [Usage](#usage)
|
|
||||||
1. [Further readings](#further-readings)
|
|
||||||
1. [Sources](#sources)
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
Installation:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
helm repo add \
|
|
||||||
'prometheus-community' 'https://prometheus-community.github.io/helm-charts'
|
|
||||||
helm upgrade --install \
|
|
||||||
--namespace 'monitoring' --create-namespace \
|
|
||||||
'prometheus' 'prometheus-community/prometheus'
|
|
||||||
```
|
|
||||||
|
|
||||||
The server can be accessed via port 80 on `prometheus-server.monitoring.svc.cluster.local` from within the cluster.
|
|
||||||
|
|
||||||
Get the server URL:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
export POD_NAME=$(kubectl get pods --namespace monitoring -l "app=prometheus,component=server" -o jsonpath="{.items[0].metadata.name}")
|
|
||||||
kubectl --namespace monitoring port-forward $POD_NAME 9090
|
|
||||||
```
|
|
||||||
|
|
||||||
Alertmanager can be accessed via port 80 on `prometheus-alertmanager.monitoring.svc.cluster.local` from within the cluster
|
|
||||||
|
|
||||||
Get Alertmanager's URL:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
export POD_NAME=$(kubectl get pods --namespace monitoring -l "app=prometheus,component=alertmanager" -o jsonpath="{.items[0].metadata.name}")
|
|
||||||
kubectl --namespace monitoring port-forward $POD_NAME 9093
|
|
||||||
```
|
|
||||||
|
|
||||||
PushGateway can be accessed via port 9091 on `prometheus-pushgateway.monitoring.svc.cluster.local` from within the cluster
|
|
||||||
|
|
||||||
Get PushGateway's URL:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
export POD_NAME=$(kubectl get pods --namespace monitoring -l "app=prometheus,component=pushgateway" -o jsonpath="{.items[0].metadata.name}")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Further readings
|
|
||||||
|
|
||||||
- [Helm chart]
|
|
||||||
|
|
||||||
## Sources
|
|
||||||
|
|
||||||
All the references in the [further readings] section, plus the following:
|
|
||||||
|
|
||||||
- [Install Prometheus and Grafana with helm 3 on a local machine VM]
|
|
||||||
- [Set up prometheus and ingress on kubernetes]
|
|
||||||
- [How to integrate Prometheus and Grafana on Kubernetes using Helm]
|
|
||||||
|
|
||||||
<!--
|
|
||||||
References
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- Upstream -->
|
|
||||||
[helm chart]: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus
|
|
||||||
|
|
||||||
<!-- In-article sections -->
|
|
||||||
[further readings]: #further-readings
|
|
||||||
|
|
||||||
<!-- Others -->
|
|
||||||
[how to integrate prometheus and grafana on kubernetes using helm]: https://semaphoreci.com/blog/prometheus-grafana-kubernetes-helm
|
|
||||||
[install prometheus and grafana with helm 3 on a local machine vm]: https://dev.to/ko_kamlesh/install-prometheus-grafana-with-helm-3-on-local-machine-vm-1kgj
|
|
||||||
[set up prometheus and ingress on kubernetes]: https://blog.gojekengineering.com/diy-how-to-set-up-prometheus-and-ingress-on-kubernetes-d395248e2ba
|
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
##
|
##
|
||||||
# Source:
|
# Source:
|
||||||
# `helm template --namespace 'gitlab' 'gitlab-runner' 'gitlab/gitlab-runner'`
|
# `helm show values --repo 'https://charts.gitlab.io' 'gitlab-runner'`
|
||||||
##
|
##
|
||||||
|
|
||||||
useTini: false
|
useTini: false
|
||||||
|
|||||||
6
examples/kubernetes/values.prometheus.yaml
Normal file
6
examples/kubernetes/values.prometheus.yaml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
|
||||||
|
##
|
||||||
|
# Source:
|
||||||
|
# `helm show values --repo 'https://prometheus-community.github.io/helm-charts' 'prometheus'`
|
||||||
|
##
|
||||||
@@ -2,9 +2,8 @@
|
|||||||
|
|
||||||
Open-source platform for monitoring and observability.
|
Open-source platform for monitoring and observability.
|
||||||
|
|
||||||
## Table of contents <!-- omit in toc -->
|
|
||||||
|
|
||||||
1. [TL;DR](#tldr)
|
1. [TL;DR](#tldr)
|
||||||
|
1. [Installation](#installation)
|
||||||
1. [Provisioning](#provisioning)
|
1. [Provisioning](#provisioning)
|
||||||
1. [Datasources](#datasources)
|
1. [Datasources](#datasources)
|
||||||
1. [Dashboards](#dashboards)
|
1. [Dashboards](#dashboards)
|
||||||
@@ -28,18 +27,57 @@ curl -sS \
|
|||||||
"
|
"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>kubernetes</summary>
|
||||||
|
|
||||||
|
```sh
|
||||||
|
helm repo add 'grafana' 'https://grafana.github.io/helm-charts'
|
||||||
|
helm -n 'monitoring' upgrade -i --create-namespace 'grafana' 'grafana/grafana'
|
||||||
|
|
||||||
|
helm -n 'monitoring' upgrade -i --create-namespace --repo 'https://grafana.github.io/helm-charts' 'grafana' 'grafana'
|
||||||
|
```
|
||||||
|
|
||||||
|
Access components:
|
||||||
|
|
||||||
|
| Component | From within the cluster |
|
||||||
|
| --------- | ----------------------------------------- |
|
||||||
|
| Server | `grafana.monitoring.svc.cluster.local:80` |
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# Access the server
|
||||||
|
kubectl -n 'monitoring' get secret 'grafana' -o jsonpath='{.data.admin-password}' | base64 --decode
|
||||||
|
kubectl -n 'monitoring' get pods -l 'app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana' \
|
||||||
|
-o jsonpath='{.items[0].metadata.name}' \
|
||||||
|
| xargs -I {} kubectl -n 'monitoring' port-forward {} 3000
|
||||||
|
```
|
||||||
|
|
||||||
|
Clean up:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
helm -n 'monitoring' delete 'grafana'
|
||||||
|
kubectl delete namespace --ignore-not-found 'monitoring'
|
||||||
|
```
|
||||||
|
|
||||||
|
Access Prometheus instances in the same namespace using `http://prometheus-server`
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
## Provisioning
|
## Provisioning
|
||||||
|
|
||||||
See [provision dashboards and data sources] for details.
|
See [provision dashboards and data sources] for details.
|
||||||
|
|
||||||
### Datasources
|
### Datasources
|
||||||
|
|
||||||
Data sources can be managed automatically at provisioning by adding YAML configuration files in the `provisioning/datasources` directory.
|
Data sources can be managed automatically at provisioning by adding YAML configuration files in the
|
||||||
|
`provisioning/datasources` directory.
|
||||||
|
|
||||||
Each configuration file can contain a list of `datasources` to add or update during startup.<br/>
|
Each configuration file can contain a list of `datasources` to add or update during startup.<br/>
|
||||||
If the data source already exists, Grafana reconfigures it to match the provisioned configuration file.
|
If the data source already exists, Grafana reconfigures it to match the provisioned configuration file.
|
||||||
|
|
||||||
Grafana also deletes the data sources listed in `deleteDatasources` before adding or updating those in the `datasources` list.
|
Grafana also deletes the data sources listed in `deleteDatasources` before adding or updating those in the `datasources`
|
||||||
|
list.
|
||||||
|
|
||||||
```yml
|
```yml
|
||||||
---
|
---
|
||||||
@@ -68,9 +106,11 @@ The easiest way to write datasources definitions in the configuration file is to
|
|||||||
1. Login to Grafana as `admin`
|
1. Login to Grafana as `admin`
|
||||||
1. Manually setup the datasource
|
1. Manually setup the datasource
|
||||||
1. Issue a `GET /api/datasources` request to Grafana's API to get the datasource configuration
|
1. Issue a `GET /api/datasources` request to Grafana's API to get the datasource configuration
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
curl -sS 'http://grafana:3000/api/datasources' -H 'Authorization: Basic YWRtaW46YWRtaW4='
|
curl -sS 'http://grafana:3000/api/datasources' -H 'Authorization: Basic YWRtaW46YWRtaW4='
|
||||||
```
|
```
|
||||||
|
|
||||||
1. Edit it as YAML
|
1. Edit it as YAML
|
||||||
1. Drop the YAML definition into the `provisioning/datasources` directory
|
1. Drop the YAML definition into the `provisioning/datasources` directory
|
||||||
|
|
||||||
@@ -100,11 +140,15 @@ datasources:
|
|||||||
|
|
||||||
### Dashboards
|
### Dashboards
|
||||||
|
|
||||||
Dashboards can be automatically managed by adding one or more YAML config files in the `provisioning/dashboards` directory.<br/>
|
Dashboards can be automatically managed by adding one or more YAML config files in the `provisioning/dashboards`
|
||||||
Each config file can contain a list of dashboards `providers` that load dashboards into Grafana from the local filesystem.
|
directory.<br/>
|
||||||
|
Each config file can contain a list of dashboards `providers` that load dashboards into Grafana from the local
|
||||||
|
filesystem.
|
||||||
|
|
||||||
When Grafana starts, it will insert all dashboards available in the configured path, or update them if they are already present.<br/>
|
When Grafana starts, it will insert all dashboards available in the configured path, or update them if they are already
|
||||||
Later on it will poll that path every `updateIntervalSeconds`, look for updated json files and update/insert those into the database.
|
present.<br/>
|
||||||
|
Later on it will poll that path every `updateIntervalSeconds`, look for updated json files and update/insert those into
|
||||||
|
the database.
|
||||||
|
|
||||||
```yml
|
```yml
|
||||||
apiVersion: 1
|
apiVersion: 1
|
||||||
@@ -137,10 +181,13 @@ $ curl -sS \
|
|||||||
|
|
||||||
## Dashboards of interest
|
## Dashboards of interest
|
||||||
|
|
||||||
| Name | Grafana ID | URLs |
|
| Name | Grafana ID | URLs |
|
||||||
| ------------------ | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------------------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| Node exporter full | 1860 | [grafana](https://grafana.com/grafana/dashboards/1860-node-exporter-full/), [github raw](https://raw.githubusercontent.com/rfmoz/grafana-dashboards/master/prometheus/node-exporter-full.json) |
|
| Node exporter full | 1860 | [grafana](https://grafana.com/grafana/dashboards/1860-node-exporter-full/), [github raw](https://raw.githubusercontent.com/rfmoz/grafana-dashboards/master/prometheus/node-exporter-full.json) |
|
||||||
| OpenWRT | 11147 | [grafana](https://grafana.com/grafana/dashboards/11147-openwrt/) |
|
| OpenWRT | 11147 | [grafana](https://grafana.com/grafana/dashboards/11147-openwrt/) |
|
||||||
|
| prometheus 2.0 overview | 3662 | FIXME |
|
||||||
|
| kubernetes cluster (prometheus) | 6417 | FIXME |
|
||||||
|
| Nextcloud | 9632 | FIXME |
|
||||||
|
|
||||||
## Further readings
|
## Further readings
|
||||||
|
|
||||||
@@ -148,7 +195,8 @@ $ curl -sS \
|
|||||||
- [Github]
|
- [Github]
|
||||||
- [HTTP API reference]
|
- [HTTP API reference]
|
||||||
- [Prometheus]
|
- [Prometheus]
|
||||||
- [`docker/monitoring`][docker/monitoring]
|
- [`containers/monitoring`][containers/monitoring]
|
||||||
|
- Official [helm chart]
|
||||||
|
|
||||||
## Sources
|
## Sources
|
||||||
|
|
||||||
@@ -157,19 +205,14 @@ All the references in the [further readings] section, plus the following:
|
|||||||
- [Provisioning]
|
- [Provisioning]
|
||||||
- [Provision dashboards and data sources]
|
- [Provision dashboards and data sources]
|
||||||
- [Data source on startup]
|
- [Data source on startup]
|
||||||
|
- [Set up prometheus and ingress on kubernetes]
|
||||||
|
- [How to integrate Prometheus and Grafana on Kubernetes using Helm]
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
References
|
Reference
|
||||||
|
═╬═Time══
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- Upstream -->
|
|
||||||
[data source on startup]: https://community.grafana.com/t/data-source-on-startup/8618/2
|
|
||||||
[github]: https://github.com/grafana/grafana
|
|
||||||
[http api reference]: https://grafana.com/docs/grafana/latest/developers/http_api/
|
|
||||||
[provision dashboards and data sources]: https://grafana.com/tutorials/provision-dashboards-and-data-sources/
|
|
||||||
[provisioning]: https://grafana.com/docs/grafana/latest/administration/provisioning/
|
|
||||||
[website]: https://grafana.com
|
|
||||||
|
|
||||||
<!-- In-article sections -->
|
<!-- In-article sections -->
|
||||||
[datasources provisioning]: #datasources
|
[datasources provisioning]: #datasources
|
||||||
[further readings]: #further-readings
|
[further readings]: #further-readings
|
||||||
@@ -178,4 +221,17 @@ All the references in the [further readings] section, plus the following:
|
|||||||
[prometheus]: prometheus.md
|
[prometheus]: prometheus.md
|
||||||
|
|
||||||
<!-- Files -->
|
<!-- Files -->
|
||||||
[docker/monitoring]: ../docker/monitoring/README.md
|
[containers/monitoring]: ../containers/monitoring/README.md
|
||||||
|
|
||||||
|
<!-- Upstream -->
|
||||||
|
[data source on startup]: https://community.grafana.com/t/data-source-on-startup/8618/2
|
||||||
|
[github]: https://github.com/grafana/grafana
|
||||||
|
[http api reference]: https://grafana.com/docs/grafana/latest/developers/http_api/
|
||||||
|
[provision dashboards and data sources]: https://grafana.com/tutorials/provision-dashboards-and-data-sources/
|
||||||
|
[provisioning]: https://grafana.com/docs/grafana/latest/administration/provisioning/
|
||||||
|
[website]: https://grafana.com
|
||||||
|
[helm chart]: https://github.com/grafana/helm-charts/tree/main/charts/grafana
|
||||||
|
|
||||||
|
<!-- Others -->
|
||||||
|
[how to integrate prometheus and grafana on kubernetes using helm]: https://semaphoreci.com/blog/prometheus-grafana-kubernetes-helm
|
||||||
|
[set up prometheus and ingress on kubernetes]: https://blog.gojekengineering.com/diy-how-to-set-up-prometheus-and-ingress-on-kubernetes-d395248e2ba
|
||||||
|
|||||||
@@ -1,20 +1,43 @@
|
|||||||
# Prometheus
|
# Prometheus
|
||||||
|
|
||||||
Monitoring and alerting system that collects metrics from configured targets at given intervals, evaluates rule
|
Monitoring and alerting system that collects metrics from configured targets at given intervals, evaluates rule
|
||||||
expressions, displays the results, and can trigger alerts when specified conditions are observed.<br/>
|
expressions, displays the results, and can trigger alerts when specified conditions are observed.
|
||||||
Metrics can also be pushed using plugins, in the event hosts are behind a firewall or prohibited from opening ports by
|
|
||||||
|
Metrics can be pushed using plugins, in the event hosts are behind a firewall or prohibited from opening ports by
|
||||||
security policy.
|
security policy.
|
||||||
|
|
||||||
## Table of contents <!-- omit in toc -->
|
1. [TL;DR](#tldr)
|
||||||
|
|
||||||
1. [Components](#components)
|
1. [Components](#components)
|
||||||
1. [Extras](#extras)
|
1. [Extras](#extras)
|
||||||
|
1. [Installation](#installation)
|
||||||
1. [Configuration](#configuration)
|
1. [Configuration](#configuration)
|
||||||
|
1. [Filter metrics](#filter-metrics)
|
||||||
1. [Queries](#queries)
|
1. [Queries](#queries)
|
||||||
1. [Filter metrics](#filter-metrics)
|
1. [Storage](#storage)
|
||||||
|
1. [Local storage](#local-storage)
|
||||||
|
1. [External storage](#external-storage)
|
||||||
|
1. [Backfilling](#backfilling)
|
||||||
|
1. [Management API](#management-api)
|
||||||
|
1. [Take snapshots of the data](#take-snapshots-of-the-data)
|
||||||
1. [Further readings](#further-readings)
|
1. [Further readings](#further-readings)
|
||||||
1. [Sources](#sources)
|
1. [Sources](#sources)
|
||||||
|
|
||||||
|
## TL;DR
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# Start the process.
|
||||||
|
prometheus
|
||||||
|
prometheus --web.enable-admin-api
|
||||||
|
|
||||||
|
# Reload the configuration file without restarting the process.
|
||||||
|
kill -s 'SIGHUP' '3969'
|
||||||
|
pkill --signal 'HUP' 'prometheus'
|
||||||
|
|
||||||
|
# Shut down the process *gracefully*.
|
||||||
|
kill -s 'SIGTERM' '3969'
|
||||||
|
pkill --signal 'TERM' 'prometheus'
|
||||||
|
```
|
||||||
|
|
||||||
## Components
|
## Components
|
||||||
|
|
||||||
Prometheus is composed by its **server**, the **Alertmanager** and its **exporters**.
|
Prometheus is composed by its **server**, the **Alertmanager** and its **exporters**.
|
||||||
@@ -33,10 +56,62 @@ Prometheus without the need of an agent.
|
|||||||
As welcomed addition, [Grafana] can be configured to use Prometheus as a backend of its in order to provide data
|
As welcomed addition, [Grafana] can be configured to use Prometheus as a backend of its in order to provide data
|
||||||
visualization and dashboarding functions on the data it provides.
|
visualization and dashboarding functions on the data it provides.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```sh
|
||||||
|
brew install 'prometheus'
|
||||||
|
docker run -p '9090:9090' -v './prometheus.yml:/etc/prometheus/prometheus.yml' --name prometheus -d 'prom/prometheus'
|
||||||
|
```
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>Kubernetes</summary>
|
||||||
|
|
||||||
|
```sh
|
||||||
|
helm repo add 'prometheus-community' 'https://prometheus-community.github.io/helm-charts'
|
||||||
|
helm -n 'monitoring' upgrade -i --create-namespace 'prometheus' 'prometheus-community/prometheus'
|
||||||
|
|
||||||
|
helm -n 'monitoring' upgrade -i --create-namespace --repo 'https://prometheus-community.github.io/helm-charts' \
|
||||||
|
'prometheus' 'prometheus'
|
||||||
|
```
|
||||||
|
|
||||||
|
Access components:
|
||||||
|
|
||||||
|
| Component | From within the cluster |
|
||||||
|
| ----------------- | --------------------------------------------------------- |
|
||||||
|
| Prometheus server | `prometheus-server.monitoring.svc.cluster.local:80` |
|
||||||
|
| Alertmanager | `prometheus-alertmanager.monitoring.svc.cluster.local:80` |
|
||||||
|
| Push gateway | `prometheus-pushgateway.monitoring.svc.cluster.local:80` |
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# Access the prometheus server.
|
||||||
|
kubectl -n 'monitoring' get pods -l 'app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus' \
|
||||||
|
-o jsonpath='{.items[0].metadata.name}' \
|
||||||
|
| xargs -I {} kubectl -n 'monitoring' port-forward {} 9090
|
||||||
|
|
||||||
|
# Access alertmanager.
|
||||||
|
kubectl -n 'monitoring' get pods -l 'app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus' \
|
||||||
|
-o jsonpath='{.items[0].metadata.name}' \
|
||||||
|
| xargs -I {} kubectl -n 'monitoring' port-forward {} 9093
|
||||||
|
|
||||||
|
# Access the push gateway.
|
||||||
|
kubectl -n 'monitoring' get pods -l -l "app=prometheus-pushgateway,component=pushgateway" \
|
||||||
|
-o jsonpath='{.items[0].metadata.name}' \
|
||||||
|
| xargs -I {} kubectl -n 'monitoring' port-forward {} 9091
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
The default configuration file is at `/etc/prometheus/prometheus.yml`.
|
The default configuration file is at `/etc/prometheus/prometheus.yml`.
|
||||||
|
|
||||||
|
Reload the configuration without restarting Prometheus's process by using the `SIGHUP` signal:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
kill -s 'SIGHUP' '3969'
|
||||||
|
pkill --signal 'HUP' 'prometheus'
|
||||||
|
```
|
||||||
|
|
||||||
```yml
|
```yml
|
||||||
global:
|
global:
|
||||||
scrape_interval: 15s
|
scrape_interval: 15s
|
||||||
@@ -58,6 +133,36 @@ scrape_configs:
|
|||||||
regex: '(node_cpu)'
|
regex: '(node_cpu)'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Filter metrics
|
||||||
|
|
||||||
|
Refer [How relabeling in Prometheus works], [Scrape selective metrics in Prometheus] and
|
||||||
|
[Dropping metrics at scrape time with Prometheus].
|
||||||
|
|
||||||
|
Use [metric relabeling configurations][metric_relabel_configs] to select which series to ingest **after** scraping:
|
||||||
|
|
||||||
|
```diff
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: router
|
||||||
|
…
|
||||||
|
+ metric_relabel_configs:
|
||||||
|
+ - # do *not* record metrics which name matches the regex
|
||||||
|
+ # in this case, those which name starts with 'node_disk_'
|
||||||
|
+ source_labels: [ __name__ ]
|
||||||
|
+ action: drop
|
||||||
|
+ regex: node_disk_.*
|
||||||
|
- job_name: hosts
|
||||||
|
…
|
||||||
|
+ metric_relabel_configs:
|
||||||
|
+ - # *only* record metrics which name matches the regex
|
||||||
|
+ # in this case, those which name starts with 'node_cpu_' with cpu=1 and mode=user
|
||||||
|
+ source_labels:
|
||||||
|
+ - __name__
|
||||||
|
+ - cpu
|
||||||
|
+ - mode
|
||||||
|
+ regex: node_cpu_.*1.*user.*
|
||||||
|
+ action: keep
|
||||||
|
```
|
||||||
|
|
||||||
## Queries
|
## Queries
|
||||||
|
|
||||||
Prometheus' query syntax is [PromQL].
|
Prometheus' query syntax is [PromQL].
|
||||||
@@ -95,40 +200,155 @@ calculates the **per-second rate of change** based on the last two data points o
|
|||||||
To calculate the overall CPU usage, the idle mode of the metric is used. Since idle percent of a processor is the
|
To calculate the overall CPU usage, the idle mode of the metric is used. Since idle percent of a processor is the
|
||||||
opposite of a busy processor, the irate value is subtracted from 1. To make it a percentage, it is multiplied by 100.
|
opposite of a busy processor, the irate value is subtracted from 1. To make it a percentage, it is multiplied by 100.
|
||||||
|
|
||||||
## Filter metrics
|
## Storage
|
||||||
|
|
||||||
Refer [How relabeling in Prometheus works], [Scrape selective metrics in Prometheus] and
|
Refer [Storage].
|
||||||
[Dropping metrics at scrape time with Prometheus].
|
|
||||||
|
|
||||||
Use [metric relabeling configurations][metric_relabel_configs] to select which series to ingest **after** scraping:
|
Prometheus uses a local on-disk time series database, but can optionally integrate with remote storage systems.
|
||||||
|
|
||||||
```diff
|
### Local storage
|
||||||
scrape_configs:
|
|
||||||
- job_name: router
|
Local storage is **not** clustered **nor** replicated. This makes it not arbitrarily scalable or durable in the face of
|
||||||
…
|
outages.<br/>
|
||||||
+ metric_relabel_configs:
|
The use of RAID disks is suggested for storage availability, and snapshots are recommended for backups.
|
||||||
+ - # do *not* record metrics which name matches the regex
|
|
||||||
+ # in this case, those which name starts with 'node_disk_'
|
> The local storage is **not** intended to be durable long-term storage and external solutions should be used to achieve
|
||||||
+ source_labels: [ __name__ ]
|
> extended retention and data durability.
|
||||||
+ action: drop
|
|
||||||
+ regex: node_disk_.*
|
External storage may be used via the remote read/write APIs.<br/>
|
||||||
- job_name: hosts
|
These storage systems vary greatly in durability, performance, and efficiency.
|
||||||
…
|
|
||||||
+ metric_relabel_configs:
|
Ingested samples are grouped into blocks of two hours.<br/>
|
||||||
+ - # *only* record metrics which name matches the regex
|
Each two-hours block consists of a uniquely named directory. This contains:
|
||||||
+ # in this case, those which name starts with 'node_cpu_' with cpu=1 and mode=user
|
|
||||||
+ source_labels:
|
- A `chunks` subdirectory, hosting all the time series samples for that window of time.<br/>
|
||||||
+ - __name__
|
Samples are grouped into one or more segment files of up to 512MB each by default.
|
||||||
+ - cpu
|
- A metadata file.
|
||||||
+ - mode
|
- An index file.<br/>
|
||||||
+ regex: node_cpu_.*1.*user.*
|
This indexes metric names and labels to time series in the `chunks` directory.
|
||||||
+ action: keep
|
|
||||||
|
When series are deleted via the API, deletion records are stored in separate `tombstones` files and are **not** deleted
|
||||||
|
immediately from the chunk segments.
|
||||||
|
|
||||||
|
The current block for incoming samples is kept in memory and is **not** fully persisted.<br/>
|
||||||
|
This is secured against crashes by a write-ahead log (WAL) that can be replayed when the Prometheus server restarts.
|
||||||
|
|
||||||
|
Write-ahead log files are stored in the `wal` directory in segments of 128MB in size.<br/>
|
||||||
|
These files contain raw data that has not yet been _compacted_.<br/>
|
||||||
|
Prometheus will retain a minimum of three write-ahead log files. Servers may retain more than three WAL files in order
|
||||||
|
to keep at least two hours of raw data stored.
|
||||||
|
|
||||||
|
The server's `data` directory looks something like follows:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./data
|
||||||
|
├── 01BKGV7JBM69T2G1BGBGM6KB12
|
||||||
|
│ └── meta.json
|
||||||
|
├── 01BKGTZQ1SYQJTR4PB43C8PD98
|
||||||
|
│ ├── chunks
|
||||||
|
│ │ └── 000001
|
||||||
|
│ ├── tombstones
|
||||||
|
│ ├── index
|
||||||
|
│ └── meta.json
|
||||||
|
├── 01BKGTZQ1HHWHV8FBJXW1Y3W0K
|
||||||
|
│ └── meta.json
|
||||||
|
├── 01BKGV7JC0RY8A6MACW02A2PJD
|
||||||
|
│ ├── chunks
|
||||||
|
│ │ └── 000001
|
||||||
|
│ ├── tombstones
|
||||||
|
│ ├── index
|
||||||
|
│ └── meta.json
|
||||||
|
├── chunks_head
|
||||||
|
│ └── 000001
|
||||||
|
└── wal
|
||||||
|
├── 000000002
|
||||||
|
└── checkpoint.00000001
|
||||||
|
└── 00000000
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The initial two-hour blocks are eventually compacted into longer blocks in the background.<br/>
|
||||||
|
Each block will contain data spanning up to 10% of the retention time or 31 days, whichever is smaller.
|
||||||
|
|
||||||
|
The retention time defaults to 15 days.<br/>
|
||||||
|
Expired block cleanup happens in the background. It may take up to two hours to remove expired blocks. Blocks must be
|
||||||
|
**fully** expired before they are removed.
|
||||||
|
|
||||||
|
Prometheus stores an average of 1-2 bytes per sample.<br/>
|
||||||
|
To plan the capacity of a Prometheus server, one can use the following rough formula:
|
||||||
|
|
||||||
|
```plaintext
|
||||||
|
needed_disk_space = retention_time_seconds * ingested_samples_per_second * bytes_per_sample
|
||||||
|
```
|
||||||
|
|
||||||
|
To lower the rate of ingested samples one can:
|
||||||
|
|
||||||
|
- Either reduce the number of time series scraped (fewer targets or fewer series per target)
|
||||||
|
- Or increase the scrape interval.
|
||||||
|
|
||||||
|
Reducing the number of series is likely more effective, due to compression of samples within a series.
|
||||||
|
|
||||||
|
If the local storage becomes corrupted for whatever reason, the best strategy is to shut down Prometheus and then remove
|
||||||
|
the entire storage directory. This means losing **all** the stored data.<br/>
|
||||||
|
One can alternatively try removing individual block directories or the WAL directory to resolve the problem. Doing so
|
||||||
|
means losing approximately two hours data per block directory.
|
||||||
|
|
||||||
|
> Prometheus does **not** support non-POSIX-compliant filesystems as local storage.<br/>
|
||||||
|
> Unrecoverable corruptions may happen.<br/>
|
||||||
|
> NFS filesystems (including AWS's EFS) are not supported as, though NFS could be POSIX-compliant, most of its
|
||||||
|
> implementations are not.<br/>
|
||||||
|
> It is strongly recommended to use a local filesystem for reliability.
|
||||||
|
|
||||||
|
If both time and size retention policies are specified, whichever triggers first will take precedence.
|
||||||
|
|
||||||
|
### External storage
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
### Backfilling
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
## Management API
|
||||||
|
|
||||||
|
### Take snapshots of the data
|
||||||
|
|
||||||
|
> Requires the TSDB APIs to be enabled (`--web.enable-admin-api`).
|
||||||
|
|
||||||
|
Use the `snapshot` API endpoint to create snapshots of all current data into `snapshots/<datetime>-<rand>` under the
|
||||||
|
TSDB's data directory and return that directory as response.
|
||||||
|
|
||||||
|
It will optionally skip including data that is only present in the head block, and which has not yet been compacted to
|
||||||
|
disk.
|
||||||
|
|
||||||
|
```plaintext
|
||||||
|
POST /api/v1/admin/tsdb/snapshot
|
||||||
|
PUT /api/v1/admin/tsdb/snapshot
|
||||||
|
```
|
||||||
|
|
||||||
|
URL query parameters:
|
||||||
|
|
||||||
|
- `skip_head`=<bool>: skip data present in the head block. Optional.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ curl -X 'POST' 'http://localhost:9090/api/v1/admin/tsdb/snapshot'
|
||||||
|
{
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"name": "20171210T211224Z-2be650b6d019eb54"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The snapshot now exists at `<data-dir>/snapshots/20171210T211224Z-2be650b6d019eb54`
|
||||||
|
|
||||||
## Further readings
|
## Further readings
|
||||||
|
|
||||||
- [Website]
|
- [Website]
|
||||||
- [Github]
|
- [Github]
|
||||||
|
- [Documentation]
|
||||||
|
- [Helm chart]
|
||||||
- [`docker/monitoring`][docker/monitoring]
|
- [`docker/monitoring`][docker/monitoring]
|
||||||
- [Node exporter]
|
- [Node exporter]
|
||||||
- [SNMP exporter]
|
- [SNMP exporter]
|
||||||
@@ -148,6 +368,10 @@ All the references in the [further readings] section, plus the following:
|
|||||||
- [Scrape selective metrics in Prometheus]
|
- [Scrape selective metrics in Prometheus]
|
||||||
- [Dropping metrics at scrape time with Prometheus]
|
- [Dropping metrics at scrape time with Prometheus]
|
||||||
- [How relabeling in Prometheus works]
|
- [How relabeling in Prometheus works]
|
||||||
|
- [Install Prometheus and Grafana with helm 3 on a local machine VM]
|
||||||
|
- [Set up prometheus and ingress on kubernetes]
|
||||||
|
- [How to integrate Prometheus and Grafana on Kubernetes using Helm]
|
||||||
|
- [node-exporter's helm chart's values]
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Reference
|
Reference
|
||||||
@@ -166,20 +390,27 @@ All the references in the [further readings] section, plus the following:
|
|||||||
[docker/monitoring]: ../containers/monitoring/README.md
|
[docker/monitoring]: ../containers/monitoring/README.md
|
||||||
|
|
||||||
<!-- Upstream -->
|
<!-- Upstream -->
|
||||||
|
[documentation]: https://prometheus.io/docs/
|
||||||
[functions]: https://prometheus.io/docs/prometheus/latest/querying/functions/
|
[functions]: https://prometheus.io/docs/prometheus/latest/querying/functions/
|
||||||
[github]: https://github.com/prometheus/prometheus
|
[github]: https://github.com/prometheus/prometheus
|
||||||
|
[helm chart]: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus
|
||||||
|
[metric_relabel_configs]: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs
|
||||||
[node exporter guide]: https://prometheus.io/docs/guides/node-exporter/
|
[node exporter guide]: https://prometheus.io/docs/guides/node-exporter/
|
||||||
|
[node-exporter's helm chart's values]: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter
|
||||||
[prometheus/node_exporter]: https://github.com/prometheus/node_exporter
|
[prometheus/node_exporter]: https://github.com/prometheus/node_exporter
|
||||||
[prometheus/snmp_exporter]: https://github.com/prometheus/snmp_exporter
|
[prometheus/snmp_exporter]: https://github.com/prometheus/snmp_exporter
|
||||||
[promql]: https://prometheus.io/docs/prometheus/latest/querying/basics/
|
[promql]: https://prometheus.io/docs/prometheus/latest/querying/basics/
|
||||||
|
[storage]: https://prometheus.io/docs/prometheus/latest/storage/
|
||||||
[website]: https://prometheus.io/
|
[website]: https://prometheus.io/
|
||||||
[metric_relabel_configs]: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs
|
|
||||||
|
|
||||||
<!-- Others -->
|
<!-- Others -->
|
||||||
[dropping metrics at scrape time with prometheus]: https://www.robustperception.io/dropping-metrics-at-scrape-time-with-prometheus/
|
[dropping metrics at scrape time with prometheus]: https://www.robustperception.io/dropping-metrics-at-scrape-time-with-prometheus/
|
||||||
[getting started with prometheus]: https://opensource.com/article/18/12/introduction-prometheus
|
[getting started with prometheus]: https://opensource.com/article/18/12/introduction-prometheus
|
||||||
[how i monitor my openwrt router with grafana cloud and prometheus]: https://grafana.com/blog/2021/02/09/how-i-monitor-my-openwrt-router-with-grafana-cloud-and-prometheus/
|
[how i monitor my openwrt router with grafana cloud and prometheus]: https://grafana.com/blog/2021/02/09/how-i-monitor-my-openwrt-router-with-grafana-cloud-and-prometheus/
|
||||||
|
[how relabeling in prometheus works]: https://grafana.com/blog/2022/03/21/how-relabeling-in-prometheus-works/
|
||||||
|
[how to integrate prometheus and grafana on kubernetes using helm]: https://semaphoreci.com/blog/prometheus-grafana-kubernetes-helm
|
||||||
|
[install prometheus and grafana with helm 3 on a local machine vm]: https://dev.to/ko_kamlesh/install-prometheus-grafana-with-helm-3-on-local-machine-vm-1kgj
|
||||||
[ordaa/boinc_exporter]: https://gitlab.com/ordaa/boinc_exporter
|
[ordaa/boinc_exporter]: https://gitlab.com/ordaa/boinc_exporter
|
||||||
[scrape selective metrics in prometheus]: https://docs.last9.io/docs/how-to-scrape-only-selective-metrics-in-prometheus
|
[scrape selective metrics in prometheus]: https://docs.last9.io/docs/how-to-scrape-only-selective-metrics-in-prometheus
|
||||||
|
[set up prometheus and ingress on kubernetes]: https://blog.gojekengineering.com/diy-how-to-set-up-prometheus-and-ingress-on-kubernetes-d395248e2ba
|
||||||
[snmp monitoring and easing it with prometheus]: https://medium.com/@openmohan/snmp-monitoring-and-easing-it-with-prometheus-b157c0a42c0c
|
[snmp monitoring and easing it with prometheus]: https://medium.com/@openmohan/snmp-monitoring-and-easing-it-with-prometheus-b157c0a42c0c
|
||||||
[how relabeling in prometheus works]: https://grafana.com/blog/2022/03/21/how-relabeling-in-prometheus-works/
|
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ helm repo update 'keda'
|
|||||||
helm search hub --max-col-width '100' 'ingress-nginx'
|
helm search hub --max-col-width '100' 'ingress-nginx'
|
||||||
helm search repo --versions 'gitlab/gitlab-runner'
|
helm search repo --versions 'gitlab/gitlab-runner'
|
||||||
|
|
||||||
helm inspect values 'gitlab/gitlab'
|
helm show values 'gitlab/gitlab'
|
||||||
helm inspect values 'gitlab/gitlab-runner' --version '0.64.1'
|
helm show values 'gitlab/gitlab-runner' --version '0.64.1'
|
||||||
|
|
||||||
helm pull 'ingress-nginx/ingress-nginx' --version '4.0.6' --destination '/tmp' --untar --untardir 'ingress-nginx'
|
helm pull 'ingress-nginx/ingress-nginx' --version '4.0.6' --destination '/tmp' --untar --untardir 'ingress-nginx'
|
||||||
|
|
||||||
@@ -24,4 +24,6 @@ helm upgrade --install 'keda' 'keda' --repo 'https://kedacore.github.io/charts'
|
|||||||
|
|
||||||
helm get manifest 'wordpress'
|
helm get manifest 'wordpress'
|
||||||
|
|
||||||
|
helm -n 'monitoring' delete 'grafana'
|
||||||
|
|
||||||
helm plugin list
|
helm plugin list
|
||||||
|
|||||||
@@ -7,3 +7,5 @@ kubectl apply --namespace 'gitlab' --values 'secrets.yaml'
|
|||||||
# Requires the metrics server to be running in the cluster
|
# Requires the metrics server to be running in the cluster
|
||||||
kubectl top nodes
|
kubectl top nodes
|
||||||
kubectl top pods
|
kubectl top pods
|
||||||
|
|
||||||
|
kubectl get events -n 'monitoring' --sort-by '.metadata.creationTimestamp'
|
||||||
|
|||||||
Reference in New Issue
Block a user