From 22f433553504f7b1f1d85b9db5537858fa08fcd1 Mon Sep 17 00:00:00 2001 From: Michele Cereda Date: Mon, 29 Jul 2024 19:00:05 +0200 Subject: [PATCH] feat(aws/eks): make ebs volumes work --- knowledge base/cloud computing/aws/eks.md | 516 +++++++++++++------- knowledge base/kubernetes/README.md | 25 +- knowledge base/kubernetes/metrics server.md | 54 ++ snippets/aws/commands.fish | 19 +- 4 files changed, 412 insertions(+), 202 deletions(-) create mode 100644 knowledge base/kubernetes/metrics server.md diff --git a/knowledge base/cloud computing/aws/eks.md b/knowledge base/cloud computing/aws/eks.md index 70b214a..a70bd40 100644 --- a/knowledge base/cloud computing/aws/eks.md +++ b/knowledge base/cloud computing/aws/eks.md @@ -10,10 +10,12 @@ 1. [Secrets encryption through KMS](#secrets-encryption-through-kms) 1. [Storage](#storage) 1. [Use EBS as volumes](#use-ebs-as-volumes) - 1. [EBS CSI driver IAM role](#ebs-csi-driver-iam-role) + 1. [EBS CSI driver IAM role as aws-managed add-on](#ebs-csi-driver-iam-role-as-aws-managed-add-on) + 1. [EBS CSI driver IAM role as self-managed add-on](#ebs-csi-driver-iam-role-as-self-managed-add-on) +1. [Metrics server](#metrics-server) 1. [Pod identity](#pod-identity) 1. [Autoscaling](#autoscaling) - 1. [Cluster autoscaler](#cluster-autoscaler) + 1. [Cluster autoscaler](#cluster-autoscaler) 1. [Troubleshooting](#troubleshooting) 1. [Identify common issues](#identify-common-issues) 1. [The worker nodes fail to join the cluster](#the-worker-nodes-fail-to-join-the-cluster) @@ -55,8 +57,7 @@ Other IAM principals _can_ have access to the cluster's API once [they are added ```sh # Create clusters. -aws eks create-cluster \ - --name 'DeepThought' \ +aws eks create-cluster --name 'DeepThought' \ --role-arn 'arn:aws:iam::000011112222:role/aws-service-role/eks.amazonaws.com/AWSServiceRoleForAmazonEKS' \ --resources-vpc-config 'subnetIds=subnet-11112222333344445,subnet-66667777888899990' aws eks create-cluster … --access-config 'authenticationMode=API' @@ -65,8 +66,7 @@ aws eks create-cluster … --access-config 'authenticationMode=API' aws eks describe-cluster --name 'DeepThought' --query 'cluster.accessConfig.authenticationMode' --output 'text' # Change encryption configuration. -aws eks associate-encryption-config \ - --cluster-name 'DeepThought' \ +aws eks associate-encryption-config --cluster-name 'DeepThought' \ --encryption-config '[{ "provider": { "keyArn": "arn:aws:kms:eu-west-1:000011112222:key/33334444-5555-6666-7777-88889999aaaa" }, "resources": [ "secrets" ] @@ -89,20 +89,18 @@ aws eks associate-access-policy --cluster-name 'DeepThought' \ # Connect to clusters. aws eks update-kubeconfig --name 'DeepThought' && kubectl cluster-info -aws eks --region 'eu-west-1' update-kubeconfig --name 'oneForAll' --profile 'dev-user' && kubectl cluster-info +aws eks --region 'eu-west-1' update-kubeconfig --name 'DeepThought' --profile 'dev-user' && kubectl cluster-info # Create EC2 node groups. -aws eks create-nodegroup \ - --cluster-name 'DeepThought' \ +aws eks create-nodegroup --cluster-name 'DeepThought' \ --nodegroup-name 'alpha' \ --scaling-config 'minSize=1,maxSize=3,desiredSize=1' \ --node-role-arn 'arn:aws:iam::000011112222:role/DeepThoughtNodeGroupsServiceRole' \ --subnets 'subnet-11112222333344445' 'subnet-66667777888899990' # Create Fargate profiles. -aws eks create-fargate-profile \ - --cluster-name 'DeepThought' \ +aws eks create-fargate-profile --cluster-name 'DeepThought' \ --fargate-profile-name 'alpha' \ --pod-execution-role-arn 'arn:aws:iam::000011112222:role/DeepThoughtFargateServiceRole' \ --subnets 'subnet-11112222333344445' 'subnet-66667777888899990' \ @@ -111,6 +109,12 @@ aws eks create-fargate-profile \ # Get addon names. aws eks describe-addon-versions --query 'addons[].addonName' + +# Get addon versions. +aws eks describe-addon-versions --addon-name 'eks-pod-identity-agent' --query 'addons[].addonVersions[]' + +# Get addon configuration options. +aws eks describe-addon-configuration --addon-name 'aws-ebs-csi-driver' --addon-version 'v1.32.0-eksbuild.1' ``` @@ -231,15 +235,17 @@ This is what worked for me: }], }); - const cluster_service_role = new aws.iam.Role("cluster-service-role", { - assumeRolePolicy: cluster_assumeRole_policy, - managedPolicyArns: [ - // alternatively, use RolePolicyAttachments - "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy", - ], - name: "DeepThoughtClusterServiceRole", - … - }); + const cluster_service_role = new aws.iam.Role( + "cluster-service-role", + { + assumeRolePolicy: cluster_assumeRole_policy, + managedPolicyArns: [ + // alternatively, use RolePolicyAttachments + "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy", + ], + name: "DeepThoughtClusterServiceRole", + }, + ); ``` @@ -262,17 +268,19 @@ This is what worked for me: Pulumi ```ts - const cluster = new aws.eks.Cluster("cluster", { - name: "DeepThought", - roleArn: cluster_service_role.arn, - vpcConfig: { - subnetIds: [ - "subnet-11112222333344445", - "subnet-66667777888899990", - ], + const cluster = new aws.eks.Cluster( + "cluster", + { + name: "DeepThought", + roleArn: cluster_service_role.arn, + vpcConfig: { + subnetIds: [ + "subnet-11112222333344445", + "subnet-66667777888899990", + ], + }, }, - … - }); + ); ``` @@ -444,17 +452,19 @@ Procedure: }], }); - const nodeGroups_service_role = new aws.iam.Role("nodeGroups-service-role", { - assumeRolePolicy: nodeGroups_assumeRole_policy, - managedPolicyArns: [ - // alternatively, use RolePolicyAttachments - "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", - "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", - "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", - ], - name: "DeepThoughtNodeGroupsServiceRole", - … - }); + const nodeGroups_service_role = new aws.iam.Role( + "nodeGroups-service-role", + { + assumeRolePolicy: nodeGroups_assumeRole_policy, + managedPolicyArns: [ + // alternatively, use RolePolicyAttachments + "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", + "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", + "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", + ], + name: "DeepThoughtNodeGroupsServiceRole", + }, + ); ``` @@ -465,8 +475,7 @@ Procedure: CLI ```sh - aws eks create-nodegroup \ - --cluster-name 'DeepThought' \ + aws eks create-nodegroup --cluster-name 'DeepThought' \ --nodegroup-name 'alpha' \ --scaling-config 'minSize=1,maxSize=3,desiredSize=1' \ --node-role-arn 'arn:aws:iam::000011112222:role/DeepThoughtNodeGroupsServiceRole' \ @@ -479,18 +488,20 @@ Procedure: Pulumi ```ts - const nodeGroup_alpha = new aws.eks.NodeGroup("nodeGroup-alpha", { - nodeGroupName: "nodeGroup-alpha", - clusterName: cluster.name, - nodeRoleArn: nodeGroups_service_role.arn, - scalingConfig: { - minSize: 1, - maxSize: 3, - desiredSize: 1, + const nodeGroup_alpha = new aws.eks.NodeGroup( + "nodeGroup-alpha", + { + nodeGroupName: "nodeGroup-alpha", + clusterName: cluster.name, + nodeRoleArn: nodeGroups_service_role.arn, + scalingConfig: { + minSize: 1, + maxSize: 3, + desiredSize: 1, + }, + subnetIds: cluster.vpcConfig.subnetIds, }, - subnetIds: cluster.vpcConfig.subnetIds, - … - }); + ); ``` @@ -560,34 +571,38 @@ Procedure: ```ts const fargate_assumeRole_policy = pulumi.all([ - aws.getRegionOutput().apply(region => region.id), - aws.getCallerIdentityOutput().apply(callerIdentity => callerIdentity.accountId), - cluster.name, - ]).apply(([regionId, awsAccountId, clusterName]) => JSON.stringify({ - Version: "2012-10-17", - Statement: [{ - Effect: "Allow", - Action: "sts:AssumeRole", - Principal: { - Service: "eks-fargate-pods.amazonaws.com", - }, - Condition: { - ArnLike: { - "aws:SourceArn": `arn:aws:eks:${regionId}:${awsAccountId}:fargateprofile/${clusterName}/*` - } - }, - }], - })); + aws.getRegionOutput().apply(region => region.id), + aws.getCallerIdentityOutput().apply(callerIdentity => callerIdentity.accountId), + cluster.name, + ]).apply( + ([regionId, awsAccountId, clusterName]) => JSON.stringify({ + Version: "2012-10-17", + Statement: [{ + Effect: "Allow", + Action: "sts:AssumeRole", + Principal: { + Service: "eks-fargate-pods.amazonaws.com", + }, + Condition: { + ArnLike: { + "aws:SourceArn": `arn:aws:eks:${regionId}:${awsAccountId}:fargateprofile/${clusterName}/*`, + }, + }, + }], + }), + ); - const fargate_service_role = new aws.iam.Role("fargate-service-role", { - assumeRolePolicy: fargate_assumeRole_policy, - managedPolicyArns: [ - // alternatively, use RolePolicyAttachments - "arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy", - ], - name: "DeepThoughtFargateServiceRole", - … - }); + const fargate_service_role = new aws.iam.Role( + "fargate-service-role", + { + assumeRolePolicy: fargate_assumeRole_policy, + managedPolicyArns: [ + // alternatively, use RolePolicyAttachments + "arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy", + ], + name: "DeepThoughtFargateServiceRole", + }, + ); ``` @@ -598,8 +613,7 @@ Procedure: CLI ```sh - aws eks create-fargate-profile \ - --cluster-name 'DeepThought' \ + aws eks create-fargate-profile --cluster-name 'DeepThought' \ --fargate-profile-name 'alpha' \ --pod-execution-role-arn 'arn:aws:iam::000011112222:role/DeepThoughtFargateServiceRole' \ --subnets 'subnet-11112222333344445' 'subnet-66667777888899990' \ @@ -612,17 +626,19 @@ Procedure: Pulumi ```ts - const fargateProfile_alpha = new aws.eks.FargateProfile("fargateProfile-alpha", { - fargateProfileName: "fargateProfile-alpha", - clusterName: cluster.name, - podExecutionRoleArn: fargate_service_role.arn, - selectors: [ - { namespace: "monitoring" }, - { namespace: "default" }, - ], - subnetIds: cluster.vpcConfig.subnetIds, - … - }); + const fargateProfile_alpha = new aws.eks.FargateProfile( + "fargateProfile-alpha", + { + fargateProfileName: "fargateProfile-alpha", + clusterName: cluster.name, + podExecutionRoleArn: fargate_service_role.arn, + selectors: [ + { namespace: "monitoring" }, + { namespace: "default" }, + ], + subnetIds: cluster.vpcConfig.subnetIds, + }, + ); ``` @@ -641,8 +657,7 @@ TL;DR: CLI ```sh - aws eks associate-encryption-config \ - --cluster-name 'DeepThought' \ + aws eks associate-encryption-config --cluster-name 'DeepThought' \ --encryption-config '[{ "provider": { "keyArn": "arn:aws:kms:eu-west-1:000011112222:key/33334444-5555-6666-7777-88889999aaaa" }, "resources": [ "secrets" ] @@ -655,13 +670,15 @@ TL;DR: Pulumi ```ts - const cluster = new aws.eks.Cluster("cluster", { - encryptionConfig: { - provider: { keyArn: `arn:aws:kms:${region}:${account}:key/${key_id}` }, - resources: [ "secrets" ], + new aws.eks.Cluster( + "cluster", + { + encryptionConfig: { + provider: { keyArn: `arn:aws:kms:${region}:${account}:key/${key_id}` }, + resources: [ "secrets" ], + }, }, - … - }); + ); ``` @@ -682,32 +699,136 @@ EBS CSI driver. Considerations: -- The EBS CSI driver needs make calls to AWS' APIs on your behalf.
- The worker nodes' IAM permissions need to be [set accordingly][ebs csi driver iam role]. +- The EBS CSI driver needs to make calls to AWS' APIs on one's behalf.
+ The driver's IAM role's permissions need to be [set accordingly][ebs csi driver iam role as aws-managed add-on]. - The EBS CSI DaemonSet is **required** to mount EBS volumes.
Fargate _can_ run the EBS _controller_ Pods, but it **cannot** run DaemonSets (including the CSI DaemonSet).
This means that Fargate **won't be able** to mount EBS volumes, and that only EC2 nodes running the DaemonSet will be able to do that. -- The EBS CSI driver is not installed on clusters by default.
+- The EBS CSI driver is **not** installed on clusters by default.
Add it as an addon. - The _managed_ and _self-managed_ add-ons **cannot** be installed at the same time. -- EKS does **not** automatically update the CSI Driver add-on when new versions are released, nor it does after clusters - are updated to new Kubernetes minor versions. +- EKS does **not** automatically update the CSI Driver add-on when new versions are released, **nor** it does after + clusters are updated to new Kubernetes minor versions. -#### EBS CSI driver IAM role +Test the driver's installation: + +```sh +# Refer https://docs.aws.amazon.com/eks/latest/userguide/ebs-sample-app.html +git clone 'https://github.com/kubernetes-sigs/aws-ebs-csi-driver.git' +cd 'aws-ebs-csi-driver/examples/kubernetes/dynamic-provisioning' +echo -e "parameters:\n type: gp3" >> 'manifests/storageclass.yaml' +kubectl apply -f 'manifests/' +kubectl describe storageClass 'ebs-sc' +kubectl get pvc +kubectl get pv +kubectl exec -it 'app' -- cat '/data/out.txt' +kubectl delete -f 'manifests/' +``` + +#### EBS CSI driver IAM role as aws-managed add-on Refer [Manage the Amazon EBS CSI driver as an Amazon EKS add-on]. Requirements: +- An existing EKS cluster (_duh!_). +- An existing IAM OIDC provider for the cluster. + + ```sh + # 1. Get the OIDC issuer ID for existing EKS clusters + OIDC_ISSUER="$(aws eks describe-cluster --name 'DeepThought' --query 'cluster.identity.oidc.issuer' --output 'text')" + OIDC_ID="$(echo "$OIDC_ISSUER" | awk -F '/id/' '{print $2}')" + # 2. Check they are present in the list of providers for the account + aws iam list-open-id-connect-providers --query 'OpenIDConnectProviderList' --output 'text' | grep "$OIDC_ID" + # 3. If the providers do not exist, create them + aws create create-open-id-connect-provider --url "$OIDC_ISSUER" --client-id-list 'sts.amazonaws.com' + ``` + +- An IAM role for the EBS CSI driver.
+ Refer [Create an Amazon EBS CSI driver IAM role]. + + If missing, the add-on **will** be installed but `kubectl describe pvc` will show the following errors: + + > ```plaintext + > failed to provision volume with StorageClass + > ``` + > + > ```plaintext + > could not create volume in EC2: UnauthorizedOperation error + > ``` + - [external-snapshotter], if planning to use the snapshot functionality of the driver.
- Its components **must** to be installed **before** the driver add-on is installed on the cluster.
The components' installation **must** be performed in this order: 1. CustomResourceDefinitions (CRDs) for `volumeSnapshotClasses`, `volumeSnapshots` and `volumeSnapshotContents`. 1. ClusterRole, ClusterRoleBinding, and other RBAC components. 1. Snapshot controller's Deployment. + ```sh + git clone 'https://github.com/kubernetes-csi/external-snapshotter.git' + kubectl kustomize 'external-snapshotter/client/config/crd' | kubectl apply -f - + kubectl -n 'kube-system' kustomize 'external-snapshotter/deploy/kubernetes/snapshot-controller' | kubectl apply -f - + ``` + +Procedure: + +- Install the add-on.
+ Make sure to specify the IAM role for the EBS CSI driver from the requirements. + +
+ CLI + + ```sh + aws eks create-addon --cluster-name 'DeepThought' \ + --addon-name 'aws-ebs-csi-driver' \ + --service-account-role-arn 'arn:aws:iam::012345678901:role/customEksEbsCsiDriverRole + ``` + +
+ +
+ Pulumi + + ```ts + new aws.eks.Addon( + "ebsCsiDriver", + { + clusterName: cluster.name, + addonName: "aws-ebs-csi-driver", + addonVersion: "v1.32.0-eksbuild.1", + resolveConflictsOnCreate: "OVERWRITE", + resolveConflictsOnUpdate: "OVERWRITE", + serviceAccountRoleArn: ebsCsiDriver_role.arn, + }, + ); + ``` + +
+ +#### EBS CSI driver IAM role as self-managed add-on + +```sh +helm upgrade -i --repo 'https://kubernetes-sigs.github.io/aws-ebs-csi-driver' \ + 'aws-ebs-csi-driver' 'aws-ebs-csi-driver' \ + --namespace 'kube-system' +``` + +## Metrics server + +Refer [View resource usage with the KubernetesMetrics Server]. + +Required by the Horizontal Pod Autoscaler and Dashboard components. + +Make sure to use a container port different from 10250 if using Fargate, as that port is reserved on Fargate: + +```sh +helm upgrade -i --repo 'https://kubernetes-sigs.github.io/metrics-server' \ + 'metrics-server' 'metrics-server' \ + --namespace 'kube-system' \ + --set 'containerPort'='10251' +``` + ## Pod identity Refer [Learn how EKS Pod Identity grants pods access to AWS services]. @@ -733,12 +854,12 @@ Requirements: ```json { - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Action": [ "eks-auth:AssumeRoleForPodIdentity" ], - "Resource": "*" - }] + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Action": [ "eks-auth:AssumeRoleForPodIdentity" ], + "Resource": "*" + }] } ``` @@ -767,12 +888,15 @@ Procedure: Pulumi ```ts - new aws.eks.Addon("pod-identity", { - clusterName: cluster.name, - addonName: "eks-pod-identity-agent", - resolveConflictsOnCreate: "OVERWRITE", - resolveConflictsOnUpdate: "OVERWRITE", - }); + new aws.eks.Addon( + "pod-identity", + { + clusterName: cluster.name, + addonName: "eks-pod-identity-agent", + resolveConflictsOnCreate: "OVERWRITE", + resolveConflictsOnUpdate: "OVERWRITE", + }, + ); ``` @@ -797,12 +921,15 @@ Procedure: Pulumi ```ts - new aws.eks.PodIdentityAssociation("customRole-to-defaultServiceAccount", { - clusterName: cluster.name, - roleArn: customRole.arn, - serviceAccount: "default", - namespace: "default", - }); + new aws.eks.PodIdentityAssociation( + "customRole-to-defaultServiceAccount", + { + clusterName: cluster.name, + roleArn: customRole.arn, + serviceAccount: "default", + namespace: "default", + }, + ); ``` @@ -832,63 +959,75 @@ Refer the autoscaler's Pulumi ```ts -const clusterAutoscaling_role = new aws.iam.Role("clusterAutoscaling", { - description: "Allows Pods to scale EKS cluster node groups on behalf of the user.", - assumeRolePolicy: JSON.stringify({ - Version: "2012-10-17", - Statement: [{ - Effect: "Allow", - Principal: { - Service: "pods.eks.amazonaws.com", - }, - Action: [ - "sts:AssumeRole", - "sts:TagSession", - ], - }], - }), -}); -const clusterAutoscaling_policy_scaleNodeGroups = new aws.iam.Policy("scaleNodeGroups", { - description: "Allows bearers to scale EKS node groups up and down.", - policy: JSON.stringify({ - Version: "2012-10-17", - Statement: [ - { - Effect: "Allow", - Action: [ - "autoscaling:DescribeAutoScalingGroups", - "autoscaling:DescribeAutoScalingInstances", - "autoscaling:DescribeLaunchConfigurations", - "autoscaling:DescribeScalingActivities", - "ec2:DescribeImages", - "ec2:DescribeInstanceTypes", - "ec2:DescribeLaunchTemplateVersions", - "ec2:GetInstanceTypesFromInstanceRequirements", - "eks:DescribeNodegroup", - ], - Resource: [ "*" ], - }, - { - Effect: "Allow", - Action: [ - "autoscaling:SetDesiredCapacity", - "autoscaling:TerminateInstanceInAutoScalingGroup", - ], - Resource: [ "*" ], - }, - ], - }), -}); -new aws.iam.RolePolicyAttachment("scaleNodeGroupsPolicy-to-clusterAutoscalingRole", { - policyArn: clusterAutoscaling_policy_scaleNodeGroups.arn, - role: clusterAutoscaling_role.name, -}); -new aws.eks.PodIdentityAssociation("clusterAutoscalingRole-to-clusterAutoscalerServiceAccount", { - clusterName: cluster.name, - roleArn: clusterAutoscaling_role.arn, - serviceAccount: "cluster-autoscaler-aws", - namespace: "kube-system", -}); +const clusterAutoscaling_role = new aws.iam.Role( + "clusterAutoscaling", + { + description: "Allows Pods to scale EKS cluster node groups on behalf of the user.", + assumeRolePolicy: JSON.stringify({ + Version: "2012-10-17", + Statement: [{ + Effect: "Allow", + Principal: { + Service: "pods.eks.amazonaws.com", + }, + Action: [ + "sts:AssumeRole", + "sts:TagSession", + ], + }], + }), + }, +); +const clusterAutoscaling_policy_eksScaleNodeGroups = new aws.iam.Policy( + "eksScaleNodeGroups", + { + description: "Allows bearers to scale EKS node groups up and down.", + policy: JSON.stringify({ + Version: "2012-10-17", + Statement: [ + { + Effect: "Allow", + Action: [ + "autoscaling:DescribeAutoScalingGroups", + "autoscaling:DescribeAutoScalingInstances", + "autoscaling:DescribeLaunchConfigurations", + "autoscaling:DescribeScalingActivities", + "ec2:DescribeImages", + "ec2:DescribeInstanceTypes", + "ec2:DescribeLaunchTemplateVersions", + "ec2:GetInstanceTypesFromInstanceRequirements", + "eks:DescribeNodegroup", + ], + Resource: [ "*" ], + }, + { + Effect: "Allow", + Action: [ + "autoscaling:SetDesiredCapacity", + "autoscaling:TerminateInstanceInAutoScalingGroup", + ], + Resource: [ "*" ], + }, + ], + }), + }, +); +new aws.iam.RolePolicyAttachment( + "scaleNodeGroupsPolicy-to-clusterAutoscalingRole", + { + policyArn: clusterAutoscaling_policy_scaleNodeGroups.arn, + role: clusterAutoscaling_role.name, + }, +); +new aws.eks.PodIdentityAssociation( + "clusterAutoscalingRole-to-clusterAutoscalerServiceAccount", + { + clusterName: cluster.name, + roleArn: clusterAutoscaling_role.arn, + serviceAccount: "cluster-autoscaler-aws", + namespace: "kube-system", + }, +); ``` @@ -898,8 +1037,9 @@ Install the cluster autoscaler component with the proper configuration: ```sh # Use a service account with podIdentityAssociation aws eks --region 'eu-west-1' update-kubeconfig --name 'DeepThought' -helm --namespace 'kube-system' upgrade --install --repo 'https://kubernetes.github.io/autoscaler' \ +helm upgrade -i --repo 'https://kubernetes.github.io/autoscaler' \ 'cluster-autoscaler' 'cluster-autoscaler' \ + --namespace 'kube-system' \ --set 'cloudProvider'='aws' \ --set 'awsRegion'='eu-west-1' \ --set 'autoDiscovery.clusterName'='DeepThought' \ @@ -982,6 +1122,8 @@ Debug: see [Identify common issues]. - [How do you get kubectl to log in to an AWS EKS cluster?] - [Learn how EKS Pod Identity grants pods access to AWS services] - [Configure instance permissions required for Systems Manager] +- [View resource usage with the KubernetesMetrics Server] +- [Create an Amazon EBS CSI driver IAM role] [amazon elastic block store (ebs) csi driver]: https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/master/README.md diff --git a/knowledge base/kubernetes/README.md b/knowledge base/kubernetes/README.md index 6fed72c..e5233fd 100644 --- a/knowledge base/kubernetes/README.md +++ b/knowledge base/kubernetes/README.md @@ -45,7 +45,6 @@ Hosted by the [Cloud Native Computing Foundation][cncf]. 1. [Run a command just before a Pod stops](#run-a-command-just-before-a-pod-stops) 1. [Examples](#examples) 1. [Create an admission webhook](#create-an-admission-webhook) - 1. [Prometheus on Kubernetes using Helm](#prometheus-on-kubernetes-using-helm) 1. [Further readings](#further-readings) 1. [Sources](#sources) @@ -362,7 +361,7 @@ Use cases: apiVersion: apps/v1 kind: Pod volumes: - - name: my-emptydir + - name: my-empty-dir emptyDir: # Omit the 'medium' field to use disk storage. # The 'Memory' medium will create tmpfs to store data. @@ -456,7 +455,7 @@ apiVersion: v1 kind: Pod spec: containers: - - image: registry.k8s.io/test-webserver + - image: registry.k8s.io/test-web-server name: test-container volumeMounts: - mountPath: /my-nfs-data @@ -488,7 +487,7 @@ metadata: zone: us-east-coast spec: volumes: - - name: my-downwardapi-volume + - name: my-downward-api-volume downwardAPI: defaultMode: 0644 items: @@ -551,7 +550,7 @@ Gotchas: 1. Delete the STS **without killing its pods**: ```sh - kubectl delete statefulsets.apps 'my-sts' --cascade 'orphan' + kubectl delete statefulSets.apps 'my-sts' --cascade 'orphan' ``` 1. Redeploy the STS with the changed size. @@ -615,15 +614,19 @@ Gotchas: Controllers are available to scale Pods or Nodes automatically, both in number or size. -Automatic scaling of Pods is done in number by the HorizontalPodAutoscaler, and in size by the VerticalPodAutoscaler.
+Automatic scaling of Pods is done in number by the Horizontal Pod Autoscaler, and in size by the +Vertical Pod Autoscaler.
Automatic scaling of Nodes is done in number by the Cluster Autoscaler, and in size by add-ons like [Karpenter]. > Be aware of mix-and-matching autoscalers for the same kind of resource.
> One can easily defy the work done by the other and make that resource behave unexpectedly. -K8S only comes with the HorizontalPodAutoscaler by default.
+K8S only comes with the Horizontal Pod Autoscaler by default.
Managed K8S usually also comes with the [Cluster Autoscaler] if autoscaling is enabled on the cluster resource. +The Horizontal and Vertical Pod Autoscalers require to access metrics.
+This requires the [metrics server] addon to be installed and accessible. + ### Pod scaling Autoscaling of Pods by number requires the use of the Horizontal Pod Autoscaler.
@@ -954,10 +957,6 @@ you need: See the example's [README][create an admission webhook]. -### Prometheus on Kubernetes using Helm - -See the example's [README][prometheus on kubernetes using helm]. - ## Further readings Usage: @@ -1053,6 +1052,7 @@ Others: [azure kubernetes service]: ../cloud%20computing/azure/aks.md [cert-manager]: cert-manager.md +[cluster autoscaler]: cluster%20autoscaler.md [connection tracking]: ../connection%20tracking.placeholder [create an admission webhook]: ../../examples/kubernetes/create%20an%20admission%20webhook/README.md [etcd]: ../etcd.placeholder @@ -1069,9 +1069,9 @@ Others: [kubescape]: kubescape.md [kubeval]: kubeval.md [kustomize]: kustomize.md +[metrics server]: metrics%20server.md [minikube]: minikube.md [network policies]: network%20policies.md -[prometheus on kubernetes using helm]: ../../examples/kubernetes/prometheus%20on%20k8s%20using%20helm.md [rke2]: rke2.md [terraform]: ../terraform.md [velero]: velero.md @@ -1079,7 +1079,6 @@ Others: [addons]: https://kubernetes.io/docs/concepts/cluster-administration/addons/ [api deprecation policy]: https://kubernetes.io/docs/reference/using-api/deprecation-policy/ -[cluster autoscaler]: https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler [common labels]: https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/ [concepts]: https://kubernetes.io/docs/concepts/ [configuration best practices]: https://kubernetes.io/docs/concepts/configuration/overview/ diff --git a/knowledge base/kubernetes/metrics server.md b/knowledge base/kubernetes/metrics server.md new file mode 100644 index 0000000..5ecaaf8 --- /dev/null +++ b/knowledge base/kubernetes/metrics server.md @@ -0,0 +1,54 @@ +# Metrics server + +Cluster-wide addon component collecting and aggregating resource metrics pulled from each kubelet.
+Serves metrics using Metrics API for use by HPA, VPA, and by the `kubectl top` command. + +It is a reference implementation of the Metrics API. + +1. [TL;DR](#tldr) +1. [Further readings](#further-readings) + 1. [Sources](#sources) + +## TL;DR + +Fetches resource metrics from all kubelets, then exposes them in the API server. + +Uses the Kubernetes API to track nodes and pods, and queries each node over HTTP to fetch metrics.
+Builds an internal view of pod metadata, and keeps a cache of pod health.
+The cached pod information is available via the extension API that the metrics-server makes available. + +Calls the kubelet API to collect metrics from each node.
+Depending on the metrics-server version it uses: + +- The metrics resource endpoint `/metrics/resource` in version v0.6.0+, or +- The summary API endpoint `/stats/summary` in older versions. + +
+ Setup + +```sh +kubectl apply -f 'https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml' +helm -n 'kube-system' upgrade --install --repo 'https://kubernetes-sigs.github.io/metrics-server' \ + 'metrics-server' 'metrics-server' --set 'containerPort'='10251' +``` + +
+ +## Further readings + +- [Website] +- [Main repository] + +### Sources + +- [Helm chart] + + + + +[helm chart]: https://artifacthub.io/packages/helm/metrics-server/metrics-server +[main repository]: https://github.com/kubernetes-sigs/metrics-server +[website]: https://kubernetes.io/docs/tasks/debug/debug-cluster/resource-metrics-pipeline/ diff --git a/snippets/aws/commands.fish b/snippets/aws/commands.fish index cb7369c..b52ceb5 100644 --- a/snippets/aws/commands.fish +++ b/snippets/aws/commands.fish @@ -116,7 +116,22 @@ aws kms decrypt --ciphertext-blob 'fileb://ciphertext.dat' --query 'Plaintext' - aws eks --region 'eu-west-1' update-kubeconfig --name 'oneForAll' --profile 'dev-user' +# Create OIDC providers for EKS clusters +# 1. Get the OIDC issuer ID for existing EKS clusters +set 'OIDC_ISSUER' (aws eks describe-cluster --name 'oneForAll' --query 'cluster.identity.oidc.issuer' --output 'text') +set 'OIDC_ID' (echo "$OIDC_ISSUER" | awk -F '/id/' '{print $2}') +# 2. Check they are present in the list of providers for the account +aws iam list-open-id-connect-providers --query 'OpenIDConnectProviderList' --output 'text' | grep "$OIDC_ID" +# 3. If the providers do not exist, create them +aws create create-open-id-connect-provider --url "$OIDC_ISSUER" --client-id-list 'sts.amazonaws.com' + +aws iam list-roles --query "Roles[?RoleName=='EksEbsCsiDriverRole'].Arn" +aws iam list-attached-role-policies --role-name 'EksEbsCsiDriverRole' --query 'AttachedPolicies[].PolicyArn' +aws iam get-policy --policy-arn 'arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy' --query 'Policy' + aws eks describe-addon-versions --query 'sort(addons[].addonName)' +aws eks describe-addon-versions --addon-name 'eks-pod-identity-agent' --query 'addons[].addonVersions[]' +aws eks describe-addon-configuration --addon-name 'aws-ebs-csi-driver' --addon-version 'v1.32.0-eksbuild.1' docker run --rm -ti -v "$HOME/.aws:/root/.aws:ro" 'amazon/aws-cli:2.17.16' autoscaling describe-auto-scaling-groups @@ -127,7 +142,5 @@ aws iam list-users --no-cli-pager --query 'Users[].UserName' --output 'text' \ # Get the user owning a specific access key aws iam list-users --no-cli-pager --query 'Users[].UserName' --output 'text' \ | xargs -n1 -P (nproc) aws iam list-access-keys \ - --query "AccessKeyMetadata[?AccessKeyId=='AKIA2HKHF74L5H5PMM5W'].UserName" --output 'json' --user \ + --query "AccessKeyMetadata[?AccessKeyId=='AKIA01234567890ABCDE'].UserName" --output 'json' --user \ | jq -rs 'flatten|first' - -AKIA01234567890ABCDE