From 5cfaa9476c9edce43a398227d373830e8dd3cbbf Mon Sep 17 00:00:00 2001 From: Michele Cereda Date: Thu, 6 Jun 2024 00:12:56 +0200 Subject: [PATCH] chore: improve notes about prometheus and grafana --- .vscode/settings.json | 4 + .../kubernetes/grafana on k8s using helm.md | 78 ----- .../prometheus on k8s using helm.md | 72 ----- examples/kubernetes/values.gitlab-runner.yaml | 2 +- examples/kubernetes/values.prometheus.yaml | 6 + knowledge base/grafana.md | 102 ++++-- knowledge base/prometheus.md | 295 ++++++++++++++++-- snippets/helm.sh | 6 +- snippets/kubectl.sh | 2 + 9 files changed, 359 insertions(+), 208 deletions(-) delete mode 100644 examples/kubernetes/grafana on k8s using helm.md delete mode 100644 examples/kubernetes/prometheus on k8s using helm.md create mode 100644 examples/kubernetes/values.prometheus.yaml diff --git a/.vscode/settings.json b/.vscode/settings.json index c33894d..24de9e5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -120,6 +120,9 @@ "csma", "daemonless", "datagram", + "datasource", + "datasources", + "dblab", "deluser", "devel", "dhclient", @@ -230,6 +233,7 @@ "pinentry", "pipx", "pkexec", + "pkill", "polkit", "portsnap", "posix", diff --git a/examples/kubernetes/grafana on k8s using helm.md b/examples/kubernetes/grafana on k8s using helm.md deleted file mode 100644 index 643db77..0000000 --- a/examples/kubernetes/grafana on k8s using helm.md +++ /dev/null @@ -1,78 +0,0 @@ -# Grafana on Kubernetes using Helm - -## Table of contents - -1. [Usage](#usage) -1. [Gotchas](#gotchas) -1. [Further readings](#further-readings) - -## Usage - -Installation: - -1. add the repository: - - ```sh - helm repo add grafana https://grafana.github.io/helm-charts - helm repo update - ``` - -1. install the release: - - ```sh - helm upgrade --install --namespace monitoring --create-namespace grafana grafana/grafana - ``` - -Get the admin user's password: - -```sh -kubectl get secret --namespace monitoring grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo -``` - -The Grafana server can be accessed via port 80 on `grafana.monitoring.svc.cluster.local` from within the cluster.
-To get the external URL: - -```sh -export POD_NAME=$(kubectl get pods --namespace monitoring -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana" -o jsonpath="{.items[0].metadata.name}") -kubectl --namespace monitoring port-forward "${POD_NAME}" 3000 -``` - -Clean up: - -```sh -helm delete --namespace monitoring grafana -kubectl delete namespace --ignore-not-found monitoring -``` - -Access a Prometheus instance in the same namespace using `http://prometheus-server` - -Get the default values from the updated chart - -```sh -helm inspect values grafana/grafana > "$(git rev-parse --show-toplevel)/kubernetes/helm/grafana/values.yaml" -``` - -## Gotchas - -Useful dashboards: - -- `3662`: prometheus 2.0 overview -- `6417`: kubernetes cluster (prometheus) -- `9632`: nextcloud - -## Further readings - -- Official [helm chart] -- [Set up prometheus and ingress on kubernetes] -- [How to integrate Prometheus and Grafana on Kubernetes using Helm] - - - - -[helm chart]: https://github.com/grafana/helm-charts/tree/main/charts/grafana - - -[how to integrate prometheus and grafana on kubernetes using helm]: https://semaphoreci.com/blog/prometheus-grafana-kubernetes-helm -[set up prometheus and ingress on kubernetes]: https://blog.gojekengineering.com/diy-how-to-set-up-prometheus-and-ingress-on-kubernetes-d395248e2ba diff --git a/examples/kubernetes/prometheus on k8s using helm.md b/examples/kubernetes/prometheus on k8s using helm.md deleted file mode 100644 index da7f9ef..0000000 --- a/examples/kubernetes/prometheus on k8s using helm.md +++ /dev/null @@ -1,72 +0,0 @@ -# Prometheus on Kubernetes using Helm - -## Table of contents - -1. [Usage](#usage) -1. [Further readings](#further-readings) -1. [Sources](#sources) - -## Usage - -Installation: - -```sh -helm repo add \ - 'prometheus-community' 'https://prometheus-community.github.io/helm-charts' -helm upgrade --install \ - --namespace 'monitoring' --create-namespace \ - 'prometheus' 'prometheus-community/prometheus' -``` - -The server can be accessed via port 80 on `prometheus-server.monitoring.svc.cluster.local` from within the cluster. - -Get the server URL: - -```sh -export POD_NAME=$(kubectl get pods --namespace monitoring -l "app=prometheus,component=server" -o jsonpath="{.items[0].metadata.name}") -kubectl --namespace monitoring port-forward $POD_NAME 9090 -``` - -Alertmanager can be accessed via port 80 on `prometheus-alertmanager.monitoring.svc.cluster.local` from within the cluster - -Get Alertmanager's URL: - -```sh -export POD_NAME=$(kubectl get pods --namespace monitoring -l "app=prometheus,component=alertmanager" -o jsonpath="{.items[0].metadata.name}") -kubectl --namespace monitoring port-forward $POD_NAME 9093 -``` - -PushGateway can be accessed via port 9091 on `prometheus-pushgateway.monitoring.svc.cluster.local` from within the cluster - -Get PushGateway's URL: - -```sh -export POD_NAME=$(kubectl get pods --namespace monitoring -l "app=prometheus,component=pushgateway" -o jsonpath="{.items[0].metadata.name}") -``` - -## Further readings - -- [Helm chart] - -## Sources - -All the references in the [further readings] section, plus the following: - -- [Install Prometheus and Grafana with helm 3 on a local machine VM] -- [Set up prometheus and ingress on kubernetes] -- [How to integrate Prometheus and Grafana on Kubernetes using Helm] - - - - -[helm chart]: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus - - -[further readings]: #further-readings - - -[how to integrate prometheus and grafana on kubernetes using helm]: https://semaphoreci.com/blog/prometheus-grafana-kubernetes-helm -[install prometheus and grafana with helm 3 on a local machine vm]: https://dev.to/ko_kamlesh/install-prometheus-grafana-with-helm-3-on-local-machine-vm-1kgj -[set up prometheus and ingress on kubernetes]: https://blog.gojekengineering.com/diy-how-to-set-up-prometheus-and-ingress-on-kubernetes-d395248e2ba diff --git a/examples/kubernetes/values.gitlab-runner.yaml b/examples/kubernetes/values.gitlab-runner.yaml index 0b33f7c..752b9dd 100644 --- a/examples/kubernetes/values.gitlab-runner.yaml +++ b/examples/kubernetes/values.gitlab-runner.yaml @@ -2,7 +2,7 @@ ## # Source: -# `helm template --namespace 'gitlab' 'gitlab-runner' 'gitlab/gitlab-runner'` +# `helm show values --repo 'https://charts.gitlab.io' 'gitlab-runner'` ## useTini: false diff --git a/examples/kubernetes/values.prometheus.yaml b/examples/kubernetes/values.prometheus.yaml new file mode 100644 index 0000000..ac71257 --- /dev/null +++ b/examples/kubernetes/values.prometheus.yaml @@ -0,0 +1,6 @@ +--- + +## +# Source: +# `helm show values --repo 'https://prometheus-community.github.io/helm-charts' 'prometheus'` +## diff --git a/knowledge base/grafana.md b/knowledge base/grafana.md index 1ce1984..0ed25b5 100644 --- a/knowledge base/grafana.md +++ b/knowledge base/grafana.md @@ -2,9 +2,8 @@ Open-source platform for monitoring and observability. -## Table of contents - 1. [TL;DR](#tldr) +1. [Installation](#installation) 1. [Provisioning](#provisioning) 1. [Datasources](#datasources) 1. [Dashboards](#dashboards) @@ -28,18 +27,57 @@ curl -sS \ " ``` +## Installation + +
+ kubernetes + +```sh +helm repo add 'grafana' 'https://grafana.github.io/helm-charts' +helm -n 'monitoring' upgrade -i --create-namespace 'grafana' 'grafana/grafana' + +helm -n 'monitoring' upgrade -i --create-namespace --repo 'https://grafana.github.io/helm-charts' 'grafana' 'grafana' +``` + +Access components: + +| Component | From within the cluster | +| --------- | ----------------------------------------- | +| Server | `grafana.monitoring.svc.cluster.local:80` | + +```sh +# Access the server +kubectl -n 'monitoring' get secret 'grafana' -o jsonpath='{.data.admin-password}' | base64 --decode +kubectl -n 'monitoring' get pods -l 'app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana' \ + -o jsonpath='{.items[0].metadata.name}' \ +| xargs -I {} kubectl -n 'monitoring' port-forward {} 3000 +``` + +Clean up: + +```sh +helm -n 'monitoring' delete 'grafana' +kubectl delete namespace --ignore-not-found 'monitoring' +``` + +Access Prometheus instances in the same namespace using `http://prometheus-server` + +
+ ## Provisioning See [provision dashboards and data sources] for details. ### Datasources -Data sources can be managed automatically at provisioning by adding YAML configuration files in the `provisioning/datasources` directory. +Data sources can be managed automatically at provisioning by adding YAML configuration files in the +`provisioning/datasources` directory. Each configuration file can contain a list of `datasources` to add or update during startup.
If the data source already exists, Grafana reconfigures it to match the provisioned configuration file. -Grafana also deletes the data sources listed in `deleteDatasources` before adding or updating those in the `datasources` list. +Grafana also deletes the data sources listed in `deleteDatasources` before adding or updating those in the `datasources` +list. ```yml --- @@ -68,9 +106,11 @@ The easiest way to write datasources definitions in the configuration file is to 1. Login to Grafana as `admin` 1. Manually setup the datasource 1. Issue a `GET /api/datasources` request to Grafana's API to get the datasource configuration + ```sh curl -sS 'http://grafana:3000/api/datasources' -H 'Authorization: Basic YWRtaW46YWRtaW4=' ``` + 1. Edit it as YAML 1. Drop the YAML definition into the `provisioning/datasources` directory @@ -100,11 +140,15 @@ datasources: ### Dashboards -Dashboards can be automatically managed by adding one or more YAML config files in the `provisioning/dashboards` directory.
-Each config file can contain a list of dashboards `providers` that load dashboards into Grafana from the local filesystem. +Dashboards can be automatically managed by adding one or more YAML config files in the `provisioning/dashboards` +directory.
+Each config file can contain a list of dashboards `providers` that load dashboards into Grafana from the local +filesystem. -When Grafana starts, it will insert all dashboards available in the configured path, or update them if they are already present.
-Later on it will poll that path every `updateIntervalSeconds`, look for updated json files and update/insert those into the database. +When Grafana starts, it will insert all dashboards available in the configured path, or update them if they are already +present.
+Later on it will poll that path every `updateIntervalSeconds`, look for updated json files and update/insert those into +the database. ```yml apiVersion: 1 @@ -137,10 +181,13 @@ $ curl -sS \ ## Dashboards of interest -| Name | Grafana ID | URLs | -| ------------------ | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Node exporter full | 1860 | [grafana](https://grafana.com/grafana/dashboards/1860-node-exporter-full/), [github raw](https://raw.githubusercontent.com/rfmoz/grafana-dashboards/master/prometheus/node-exporter-full.json) | -| OpenWRT | 11147 | [grafana](https://grafana.com/grafana/dashboards/11147-openwrt/) | +| Name | Grafana ID | URLs | +| ------------------------------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Node exporter full | 1860 | [grafana](https://grafana.com/grafana/dashboards/1860-node-exporter-full/), [github raw](https://raw.githubusercontent.com/rfmoz/grafana-dashboards/master/prometheus/node-exporter-full.json) | +| OpenWRT | 11147 | [grafana](https://grafana.com/grafana/dashboards/11147-openwrt/) | +| prometheus 2.0 overview | 3662 | FIXME | +| kubernetes cluster (prometheus) | 6417 | FIXME | +| Nextcloud | 9632 | FIXME | ## Further readings @@ -148,7 +195,8 @@ $ curl -sS \ - [Github] - [HTTP API reference] - [Prometheus] -- [`docker/monitoring`][docker/monitoring] +- [`containers/monitoring`][containers/monitoring] +- Official [helm chart] ## Sources @@ -157,19 +205,14 @@ All the references in the [further readings] section, plus the following: - [Provisioning] - [Provision dashboards and data sources] - [Data source on startup] +- [Set up prometheus and ingress on kubernetes] +- [How to integrate Prometheus and Grafana on Kubernetes using Helm] - -[data source on startup]: https://community.grafana.com/t/data-source-on-startup/8618/2 -[github]: https://github.com/grafana/grafana -[http api reference]: https://grafana.com/docs/grafana/latest/developers/http_api/ -[provision dashboards and data sources]: https://grafana.com/tutorials/provision-dashboards-and-data-sources/ -[provisioning]: https://grafana.com/docs/grafana/latest/administration/provisioning/ -[website]: https://grafana.com - [datasources provisioning]: #datasources [further readings]: #further-readings @@ -178,4 +221,17 @@ All the references in the [further readings] section, plus the following: [prometheus]: prometheus.md -[docker/monitoring]: ../docker/monitoring/README.md +[containers/monitoring]: ../containers/monitoring/README.md + + +[data source on startup]: https://community.grafana.com/t/data-source-on-startup/8618/2 +[github]: https://github.com/grafana/grafana +[http api reference]: https://grafana.com/docs/grafana/latest/developers/http_api/ +[provision dashboards and data sources]: https://grafana.com/tutorials/provision-dashboards-and-data-sources/ +[provisioning]: https://grafana.com/docs/grafana/latest/administration/provisioning/ +[website]: https://grafana.com +[helm chart]: https://github.com/grafana/helm-charts/tree/main/charts/grafana + + +[how to integrate prometheus and grafana on kubernetes using helm]: https://semaphoreci.com/blog/prometheus-grafana-kubernetes-helm +[set up prometheus and ingress on kubernetes]: https://blog.gojekengineering.com/diy-how-to-set-up-prometheus-and-ingress-on-kubernetes-d395248e2ba diff --git a/knowledge base/prometheus.md b/knowledge base/prometheus.md index 616df90..eaa88ec 100644 --- a/knowledge base/prometheus.md +++ b/knowledge base/prometheus.md @@ -1,20 +1,43 @@ # Prometheus Monitoring and alerting system that collects metrics from configured targets at given intervals, evaluates rule -expressions, displays the results, and can trigger alerts when specified conditions are observed.
-Metrics can also be pushed using plugins, in the event hosts are behind a firewall or prohibited from opening ports by +expressions, displays the results, and can trigger alerts when specified conditions are observed. + +Metrics can be pushed using plugins, in the event hosts are behind a firewall or prohibited from opening ports by security policy. -## Table of contents - +1. [TL;DR](#tldr) 1. [Components](#components) 1. [Extras](#extras) +1. [Installation](#installation) 1. [Configuration](#configuration) + 1. [Filter metrics](#filter-metrics) 1. [Queries](#queries) -1. [Filter metrics](#filter-metrics) +1. [Storage](#storage) + 1. [Local storage](#local-storage) + 1. [External storage](#external-storage) + 1. [Backfilling](#backfilling) +1. [Management API](#management-api) + 1. [Take snapshots of the data](#take-snapshots-of-the-data) 1. [Further readings](#further-readings) 1. [Sources](#sources) +## TL;DR + +```sh +# Start the process. +prometheus +prometheus --web.enable-admin-api + +# Reload the configuration file without restarting the process. +kill -s 'SIGHUP' '3969' +pkill --signal 'HUP' 'prometheus' + +# Shut down the process *gracefully*. +kill -s 'SIGTERM' '3969' +pkill --signal 'TERM' 'prometheus' +``` + ## Components Prometheus is composed by its **server**, the **Alertmanager** and its **exporters**. @@ -33,10 +56,62 @@ Prometheus without the need of an agent. As welcomed addition, [Grafana] can be configured to use Prometheus as a backend of its in order to provide data visualization and dashboarding functions on the data it provides. +## Installation + +```sh +brew install 'prometheus' +docker run -p '9090:9090' -v './prometheus.yml:/etc/prometheus/prometheus.yml' --name prometheus -d 'prom/prometheus' +``` + +
+ Kubernetes + +```sh +helm repo add 'prometheus-community' 'https://prometheus-community.github.io/helm-charts' +helm -n 'monitoring' upgrade -i --create-namespace 'prometheus' 'prometheus-community/prometheus' + +helm -n 'monitoring' upgrade -i --create-namespace --repo 'https://prometheus-community.github.io/helm-charts' \ + 'prometheus' 'prometheus' +``` + +Access components: + +| Component | From within the cluster | +| ----------------- | --------------------------------------------------------- | +| Prometheus server | `prometheus-server.monitoring.svc.cluster.local:80` | +| Alertmanager | `prometheus-alertmanager.monitoring.svc.cluster.local:80` | +| Push gateway | `prometheus-pushgateway.monitoring.svc.cluster.local:80` | + +```sh +# Access the prometheus server. +kubectl -n 'monitoring' get pods -l 'app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prometheus' \ + -o jsonpath='{.items[0].metadata.name}' \ +| xargs -I {} kubectl -n 'monitoring' port-forward {} 9090 + +# Access alertmanager. +kubectl -n 'monitoring' get pods -l 'app.kubernetes.io/name=alertmanager,app.kubernetes.io/instance=prometheus' \ + -o jsonpath='{.items[0].metadata.name}' \ +| xargs -I {} kubectl -n 'monitoring' port-forward {} 9093 + +# Access the push gateway. +kubectl -n 'monitoring' get pods -l -l "app=prometheus-pushgateway,component=pushgateway" \ + -o jsonpath='{.items[0].metadata.name}' \ +| xargs -I {} kubectl -n 'monitoring' port-forward {} 9091 +``` + +
+ ## Configuration The default configuration file is at `/etc/prometheus/prometheus.yml`. +Reload the configuration without restarting Prometheus's process by using the `SIGHUP` signal: + +```sh +kill -s 'SIGHUP' '3969' +pkill --signal 'HUP' 'prometheus' +``` + ```yml global: scrape_interval: 15s @@ -58,6 +133,36 @@ scrape_configs: regex: '(node_cpu)' ``` +### Filter metrics + +Refer [How relabeling in Prometheus works], [Scrape selective metrics in Prometheus] and +[Dropping metrics at scrape time with Prometheus]. + +Use [metric relabeling configurations][metric_relabel_configs] to select which series to ingest **after** scraping: + +```diff + scrape_configs: + - job_name: router + … ++ metric_relabel_configs: ++ - # do *not* record metrics which name matches the regex ++ # in this case, those which name starts with 'node_disk_' ++ source_labels: [ __name__ ] ++ action: drop ++ regex: node_disk_.* + - job_name: hosts + … ++ metric_relabel_configs: ++ - # *only* record metrics which name matches the regex ++ # in this case, those which name starts with 'node_cpu_' with cpu=1 and mode=user ++ source_labels: ++ - __name__ ++ - cpu ++ - mode ++ regex: node_cpu_.*1.*user.* ++ action: keep +``` + ## Queries Prometheus' query syntax is [PromQL]. @@ -95,40 +200,155 @@ calculates the **per-second rate of change** based on the last two data points o To calculate the overall CPU usage, the idle mode of the metric is used. Since idle percent of a processor is the opposite of a busy processor, the irate value is subtracted from 1. To make it a percentage, it is multiplied by 100. -## Filter metrics +## Storage -Refer [How relabeling in Prometheus works], [Scrape selective metrics in Prometheus] and -[Dropping metrics at scrape time with Prometheus]. +Refer [Storage]. -Use [metric relabeling configurations][metric_relabel_configs] to select which series to ingest **after** scraping: +Prometheus uses a local on-disk time series database, but can optionally integrate with remote storage systems. -```diff - scrape_configs: - - job_name: router - … -+ metric_relabel_configs: -+ - # do *not* record metrics which name matches the regex -+ # in this case, those which name starts with 'node_disk_' -+ source_labels: [ __name__ ] -+ action: drop -+ regex: node_disk_.* - - job_name: hosts - … -+ metric_relabel_configs: -+ - # *only* record metrics which name matches the regex -+ # in this case, those which name starts with 'node_cpu_' with cpu=1 and mode=user -+ source_labels: -+ - __name__ -+ - cpu -+ - mode -+ regex: node_cpu_.*1.*user.* -+ action: keep +### Local storage + +Local storage is **not** clustered **nor** replicated. This makes it not arbitrarily scalable or durable in the face of +outages.
+The use of RAID disks is suggested for storage availability, and snapshots are recommended for backups. + +> The local storage is **not** intended to be durable long-term storage and external solutions should be used to achieve +> extended retention and data durability. + +External storage may be used via the remote read/write APIs.
+These storage systems vary greatly in durability, performance, and efficiency. + +Ingested samples are grouped into blocks of two hours.
+Each two-hours block consists of a uniquely named directory. This contains: + +- A `chunks` subdirectory, hosting all the time series samples for that window of time.
+ Samples are grouped into one or more segment files of up to 512MB each by default. +- A metadata file. +- An index file.
+ This indexes metric names and labels to time series in the `chunks` directory. + +When series are deleted via the API, deletion records are stored in separate `tombstones` files and are **not** deleted +immediately from the chunk segments. + +The current block for incoming samples is kept in memory and is **not** fully persisted.
+This is secured against crashes by a write-ahead log (WAL) that can be replayed when the Prometheus server restarts. + +Write-ahead log files are stored in the `wal` directory in segments of 128MB in size.
+These files contain raw data that has not yet been _compacted_.
+Prometheus will retain a minimum of three write-ahead log files. Servers may retain more than three WAL files in order +to keep at least two hours of raw data stored. + +The server's `data` directory looks something like follows: + +```sh +./data +├── 01BKGV7JBM69T2G1BGBGM6KB12 +│ └── meta.json +├── 01BKGTZQ1SYQJTR4PB43C8PD98 +│ ├── chunks +│ │ └── 000001 +│ ├── tombstones +│ ├── index +│ └── meta.json +├── 01BKGTZQ1HHWHV8FBJXW1Y3W0K +│ └── meta.json +├── 01BKGV7JC0RY8A6MACW02A2PJD +│ ├── chunks +│ │ └── 000001 +│ ├── tombstones +│ ├── index +│ └── meta.json +├── chunks_head +│ └── 000001 +└── wal + ├── 000000002 + └── checkpoint.00000001 + └── 00000000 ``` +The initial two-hour blocks are eventually compacted into longer blocks in the background.
+Each block will contain data spanning up to 10% of the retention time or 31 days, whichever is smaller. + +The retention time defaults to 15 days.
+Expired block cleanup happens in the background. It may take up to two hours to remove expired blocks. Blocks must be +**fully** expired before they are removed. + +Prometheus stores an average of 1-2 bytes per sample.
+To plan the capacity of a Prometheus server, one can use the following rough formula: + +```plaintext +needed_disk_space = retention_time_seconds * ingested_samples_per_second * bytes_per_sample +``` + +To lower the rate of ingested samples one can: + +- Either reduce the number of time series scraped (fewer targets or fewer series per target) +- Or increase the scrape interval. + +Reducing the number of series is likely more effective, due to compression of samples within a series. + +If the local storage becomes corrupted for whatever reason, the best strategy is to shut down Prometheus and then remove +the entire storage directory. This means losing **all** the stored data.
+One can alternatively try removing individual block directories or the WAL directory to resolve the problem. Doing so +means losing approximately two hours data per block directory. + +> Prometheus does **not** support non-POSIX-compliant filesystems as local storage.
+> Unrecoverable corruptions may happen.
+> NFS filesystems (including AWS's EFS) are not supported as, though NFS could be POSIX-compliant, most of its +> implementations are not.
+> It is strongly recommended to use a local filesystem for reliability. + +If both time and size retention policies are specified, whichever triggers first will take precedence. + +### External storage + +TODO + +### Backfilling + +TODO + +## Management API + +### Take snapshots of the data + +> Requires the TSDB APIs to be enabled (`--web.enable-admin-api`). + +Use the `snapshot` API endpoint to create snapshots of all current data into `snapshots/-` under the +TSDB's data directory and return that directory as response. + +It will optionally skip including data that is only present in the head block, and which has not yet been compacted to +disk. + +```plaintext +POST /api/v1/admin/tsdb/snapshot +PUT /api/v1/admin/tsdb/snapshot +``` + +URL query parameters: + +- `skip_head`=: skip data present in the head block. Optional. + +Examples: + +```sh +$ curl -X 'POST' 'http://localhost:9090/api/v1/admin/tsdb/snapshot' +{ + "status": "success", + "data": { + "name": "20171210T211224Z-2be650b6d019eb54" + } +} +``` + +The snapshot now exists at `/snapshots/20171210T211224Z-2be650b6d019eb54` + ## Further readings - [Website] - [Github] +- [Documentation] +- [Helm chart] - [`docker/monitoring`][docker/monitoring] - [Node exporter] - [SNMP exporter] @@ -148,6 +368,10 @@ All the references in the [further readings] section, plus the following: - [Scrape selective metrics in Prometheus] - [Dropping metrics at scrape time with Prometheus] - [How relabeling in Prometheus works] +- [Install Prometheus and Grafana with helm 3 on a local machine VM] +- [Set up prometheus and ingress on kubernetes] +- [How to integrate Prometheus and Grafana on Kubernetes using Helm] +- [node-exporter's helm chart's values] +[documentation]: https://prometheus.io/docs/ [functions]: https://prometheus.io/docs/prometheus/latest/querying/functions/ [github]: https://github.com/prometheus/prometheus +[helm chart]: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus +[metric_relabel_configs]: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs [node exporter guide]: https://prometheus.io/docs/guides/node-exporter/ +[node-exporter's helm chart's values]: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter [prometheus/node_exporter]: https://github.com/prometheus/node_exporter [prometheus/snmp_exporter]: https://github.com/prometheus/snmp_exporter [promql]: https://prometheus.io/docs/prometheus/latest/querying/basics/ +[storage]: https://prometheus.io/docs/prometheus/latest/storage/ [website]: https://prometheus.io/ -[metric_relabel_configs]: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs [dropping metrics at scrape time with prometheus]: https://www.robustperception.io/dropping-metrics-at-scrape-time-with-prometheus/ [getting started with prometheus]: https://opensource.com/article/18/12/introduction-prometheus [how i monitor my openwrt router with grafana cloud and prometheus]: https://grafana.com/blog/2021/02/09/how-i-monitor-my-openwrt-router-with-grafana-cloud-and-prometheus/ +[how relabeling in prometheus works]: https://grafana.com/blog/2022/03/21/how-relabeling-in-prometheus-works/ +[how to integrate prometheus and grafana on kubernetes using helm]: https://semaphoreci.com/blog/prometheus-grafana-kubernetes-helm +[install prometheus and grafana with helm 3 on a local machine vm]: https://dev.to/ko_kamlesh/install-prometheus-grafana-with-helm-3-on-local-machine-vm-1kgj [ordaa/boinc_exporter]: https://gitlab.com/ordaa/boinc_exporter [scrape selective metrics in prometheus]: https://docs.last9.io/docs/how-to-scrape-only-selective-metrics-in-prometheus +[set up prometheus and ingress on kubernetes]: https://blog.gojekengineering.com/diy-how-to-set-up-prometheus-and-ingress-on-kubernetes-d395248e2ba [snmp monitoring and easing it with prometheus]: https://medium.com/@openmohan/snmp-monitoring-and-easing-it-with-prometheus-b157c0a42c0c -[how relabeling in prometheus works]: https://grafana.com/blog/2022/03/21/how-relabeling-in-prometheus-works/ diff --git a/snippets/helm.sh b/snippets/helm.sh index 39a0ffb..7cb04ed 100644 --- a/snippets/helm.sh +++ b/snippets/helm.sh @@ -10,8 +10,8 @@ helm repo update 'keda' helm search hub --max-col-width '100' 'ingress-nginx' helm search repo --versions 'gitlab/gitlab-runner' -helm inspect values 'gitlab/gitlab' -helm inspect values 'gitlab/gitlab-runner' --version '0.64.1' +helm show values 'gitlab/gitlab' +helm show values 'gitlab/gitlab-runner' --version '0.64.1' helm pull 'ingress-nginx/ingress-nginx' --version '4.0.6' --destination '/tmp' --untar --untardir 'ingress-nginx' @@ -24,4 +24,6 @@ helm upgrade --install 'keda' 'keda' --repo 'https://kedacore.github.io/charts' helm get manifest 'wordpress' +helm -n 'monitoring' delete 'grafana' + helm plugin list diff --git a/snippets/kubectl.sh b/snippets/kubectl.sh index 0b48497..769c49d 100644 --- a/snippets/kubectl.sh +++ b/snippets/kubectl.sh @@ -7,3 +7,5 @@ kubectl apply --namespace 'gitlab' --values 'secrets.yaml' # Requires the metrics server to be running in the cluster kubectl top nodes kubectl top pods + +kubectl get events -n 'monitoring' --sort-by '.metadata.creationTimestamp'