From eba2aaebeaaab62f690100bf564c19f925f0732e Mon Sep 17 00:00:00 2001 From: Michele Cereda Date: Mon, 16 Sep 2024 20:17:50 +0200 Subject: [PATCH] fix(k8s): set affinity examples for addons --- .markdownlint.yaml | 1 + knowledge base/cloud computing/aws/eks.md | 6 +-- .../kubernetes/cluster autoscaler.md | 52 ++++++++++++------- knowledge base/kubernetes/metrics server.md | 9 +++- snippets/helm.sh | 15 ++++++ 5 files changed, 60 insertions(+), 23 deletions(-) diff --git a/.markdownlint.yaml b/.markdownlint.yaml index dc7d9de..5264fb5 100644 --- a/.markdownlint.yaml +++ b/.markdownlint.yaml @@ -13,3 +13,4 @@ MD033: # no-inline-html - code - details - summary + - sup diff --git a/knowledge base/cloud computing/aws/eks.md b/knowledge base/cloud computing/aws/eks.md index 73e4521..c09bfe4 100644 --- a/knowledge base/cloud computing/aws/eks.md +++ b/knowledge base/cloud computing/aws/eks.md @@ -1126,10 +1126,8 @@ Installation: 'aws-load-balancer-controller' 'aws-load-balancer-controller' \ --namespace 'kube-system' \ --set 'clusterName'='DeepThought' \ - --set 'serviceAccount.create'='true' \ - --set 'serviceAccount.name'='aws-load-balancer-controller' \ - --set 'region'='eu-west-1' \ - --set 'vpcId'='vpc-01234567' + --set 'serviceAccount.create'='true' --set 'serviceAccount.name'='aws-load-balancer-controller' \ + --set 'region'='eu-west-1' --set 'vpcId'='vpc-01234567' ``` diff --git a/knowledge base/kubernetes/cluster autoscaler.md b/knowledge base/kubernetes/cluster autoscaler.md index f18d85b..70600df 100644 --- a/knowledge base/kubernetes/cluster autoscaler.md +++ b/knowledge base/kubernetes/cluster autoscaler.md @@ -1,18 +1,25 @@ # Cluster autoscaler -Automatically adjusts the number of nodes in Kubernetes clusters. +Automatically adjusts the number of nodes in Kubernetes clusters to meet their current needs. 1. [TL;DR](#tldr) +1. [Best practices](#best-practices) 1. [Further readings](#further-readings) 1. [Sources](#sources) ## TL;DR -Acts when one of the following conditions is true: +The purpose of Cluster Autoscaler is to get pending pods a place to run on. -- Pods failed to run in the cluster due to insufficient resources. -- Nodes in the cluster have been underutilized for an extended period of time, and their pods can be placed on other - existing nodes. +The autoscaler acts when one of the following conditions is true: + +- Pods failed to run in the cluster due to insufficient resources.
+ This triggers a scale-**up** event, where it will try to **add** a new node. +- Nodes in the cluster have been consistently underutilized for a significant amount of time, and their pods can be + moved on other existing nodes.
+ This triggers a scale-**down** event, where it will try to **remove** an existing node. + +The time required for node provisioning depends on the cloud provider and other Kubernetes components.
Setup @@ -31,27 +38,36 @@ helm --namespace 'kube-system' uninstall 'cluster-autoscaler'
- -
Real world use cases ```sh -helm --namespace 'kube-system' upgrade --install 'cluster-autoscaler' 'autoscaler/cluster-autoscaler' \ - --set 'cloudProvider'='aws' --set 'awsRegion'='eu-west-1' \ - --set 'autoDiscovery.clusterName'='defaultCluster' --set 'rbac.serviceAccount.name'='cluster-autoscaler-aws' +aws eks --region 'eu-west-1' update-kubeconfig --name 'custom-eks-cluster' \ +&& helm --namespace 'kube-system' upgrade --install --repo 'https://kubernetes.github.io/autoscaler' \ + 'cluster-autoscaler' 'cluster-autoscaler' \ + --set 'cloudProvider'='aws' --set 'awsRegion'='eu-west-1' --set 'autoDiscovery.clusterName'='custom-eks-cluster' \ + --set 'rbac.serviceAccount.name'='cluster-autoscaler-aws' \ + --set 'replicaCount'='2' \ + --set 'resources.requests.cpu'='40m' --set 'resources.requests.memory'='50Mi' \ + --set 'resources.limits.cpu'='100m' --set 'resources.limits.memory'='300Mi' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].weight'='100' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.topologyKey'='kubernetes.io/hostname' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.labelSelector.matchExpressions[0].key'='app.kubernetes.io/name' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.labelSelector.matchExpressions[0].operator'='In' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.labelSelector.matchExpressions[0].values[0]'='aws-cluster-autoscaler' ```
+## Best practices + +- Do **not** modify nodes belonging to autoscaled node groups directly. +- All nodes within the same autoscaled node group should have the same capacity, labels and system pods running on them. +- Specify requests for all the pods one can. +- Should one need to prevent pods from being deleted too abruptly, consider using PodDisruptionBudgets. +- Check one's cloud provider's quota is big enough **before** specifying min/max settings for clusters' node pools. +- Do **not** run **any** additional node group autoscaler (**especially** those from one's own cloud provider). + ## Further readings - [Main repository] diff --git a/knowledge base/kubernetes/metrics server.md b/knowledge base/kubernetes/metrics server.md index 5ecaaf8..11bde25 100644 --- a/knowledge base/kubernetes/metrics server.md +++ b/knowledge base/kubernetes/metrics server.md @@ -29,7 +29,14 @@ Depending on the metrics-server version it uses: ```sh kubectl apply -f 'https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml' helm -n 'kube-system' upgrade --install --repo 'https://kubernetes-sigs.github.io/metrics-server' \ - 'metrics-server' 'metrics-server' --set 'containerPort'='10251' + 'metrics-server' 'metrics-server' \ + --set 'replicas'='2' --set 'addonResizer.enabled'='true' \ + --set 'containerPort'='10251' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].weight'='100' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.topologyKey'='kubernetes.io/hostname' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.labelSelector.matchExpressions[0].key'='app.kubernetes.io/name' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.labelSelector.matchExpressions[0].operator'='In' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.labelSelector.matchExpressions[0].values[0]'='metrics-server' ``` diff --git a/snippets/helm.sh b/snippets/helm.sh index 6934ad9..c7f7844 100644 --- a/snippets/helm.sh +++ b/snippets/helm.sh @@ -23,9 +23,24 @@ helm --namespace 'gitlab' upgrade --install --create-namespace --version '0.64.1 helm upgrade --install 'keda' 'keda' --repo 'https://kedacore.github.io/charts' --namespace 'keda' --create-namespace helm get manifest 'wordpress' +helm --namespace 'kube-system' get values 'metrics-server' helm -n 'monitoring' delete 'grafana' helm plugin list helm plugin install 'https://github.com/databus23/helm-diff' helm -n 'pocs' diff upgrade --repo 'https://dl.gitea.com/charts/' 'gitea' 'gitea' -f 'values.yaml' + +aws eks --region 'eu-west-1' update-kubeconfig --name 'custom-eks-cluster' \ +&& helm --namespace 'kube-system' upgrade --install --repo 'https://kubernetes.github.io/autoscaler' + 'cluster-autoscaler' 'cluster-autoscaler' \ + --set 'cloudProvider'='aws' --set 'awsRegion'='eu-west-1' --set 'autoDiscovery.clusterName'='custom-eks-cluster' \ + --set 'rbac.serviceAccount.name'='cluster-autoscaler-aws' \ + --set 'replicaCount'='2' \ + --set 'resources.requests.cpu'='40m' --set 'resources.requests.memory'='50Mi' \ + --set 'resources.limits.cpu'='100m' --set 'resources.limits.memory'='300Mi' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].weight'='100' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.topologyKey'='kubernetes.io/hostname' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.labelSelector.matchExpressions[0].key'='app.kubernetes.io/name' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.labelSelector.matchExpressions[0].operator'='In' \ + --set 'affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].podAffinityTerm.labelSelector.matchExpressions[0].values[0]'='aws-cluster-autoscaler'