diff --git a/knowledge base/kubectl.md b/knowledge base/kubectl.md
index 84f07ed..bc7a8b3 100644
--- a/knowledge base/kubectl.md
+++ b/knowledge base/kubectl.md
@@ -23,21 +23,21 @@ kubectl get pods -l app=nginx,tier=frontend
### Table of contents
-- [TL;DR](#tldr)
-- [Configuration](#configuration)
- - [Configure access to multiple clusters](#configure-access-to-multiple-clusters)
-- [Create resources](#create-resources)
-- [Output formatting](#output-formatting)
-- [Verbosity and debugging](#verbosity-and-debugging)
-- [Further readings](#further-readings)
-- [Sources](#sources)
+1. [TL;DR](#tldr)
+1. [Configuration](#configuration)
+ 1. [Configure access to multiple clusters](#configure-access-to-multiple-clusters)
+1. [Create resources](#create-resources)
+1. [Output formatting](#output-formatting)
+1. [Verbosity and debugging](#verbosity-and-debugging)
+1. [Further readings](#further-readings)
+1. [Sources](#sources)
## TL;DR
```sh
# Enable shell completion.
-source <(kubectl completion bash)
-echo "[[ $commands[kubectl] ]] && source <(kubectl completion zsh)" >> ~/.zshrc
+source <(kubectl completion 'bash')
+echo "[[ $commands[kubectl] ]] && source <(kubectl completion 'zsh')" >> ~/.zshrc
# Shot the merged configuration.
kubectl config view
@@ -48,13 +48,13 @@ kubectl config view -o jsonpath='{.users[*].name}'
kubectl config view -o jsonpath='{.users[?(@.name == "e2e")].user.password}'
# Set configuration values.
-kubectl config set-context --current --namespace=keda
-kubectl config set-context gce --user=cluster-admin --namespace=foo
+kubectl config set-context --current --namespace='keda'
+kubectl config set-context 'gce' --user='cluster-admin' --namespace='foo'
kubectl config set-credentials \
- kubeuser/foo.kubernetes.com --username=kubeuser --password=kubepassword
+ 'kubeuser/foo.kubernetes.com' --username='kubeuser' --password='kubepassword'
# Delete configuration values.
-kubectl config unset users.foo
+kubectl config unset 'users.foo'
# Use multiple config files at once.
# This will temporarily merge them in one big configuration file.
@@ -65,47 +65,47 @@ kubectl config get-contexts
kubectl config current-context
# Set context as the default one.
-kubectl config use-context docker-desktop
-kubectl config use-context gce
+kubectl config use-context 'docker-desktop'
+kubectl config use-context 'gce'
# Display addresses of the master and services.
kubectl cluster-info
# Dump the complete current cluster state.
kubectl cluster-info dump
-kubectl cluster-info dump --output-directory=/path/to/cluster-state
+kubectl cluster-info dump --output-directory='/path/to/cluster-state'
# List supported resources types along with their short name, API group, Kind,
# and whether they are namespaced.
kubectl api-resources
-kubectl api-resources --namespaced=true
-kubectl api-resources -o name
-kubectl api-resources -o wide
-kubectl api-resources --verbs=list,get
+kubectl api-resources --namespaced='true'
+kubectl api-resources -o 'name'
+kubectl api-resources -o 'wide'
+kubectl api-resources --verbs='list,get'
# Show the documentation about resources or their fields.
-kubectl explain pods
-kubectl explain pods.spec.containers
+kubectl explain 'pods'
+kubectl explain 'pods.spec.containers'
# List and filter resources.
kubectl get pods
-kubectl get pod/coredns-845757d86-47np2 -n kube-system
+kubectl get 'pod/coredns-845757d86-47np2' -n 'kube-system'
kubectl get namespaces,pods --show-labels
-kubectl get services -A -o wide
-kubectl get rs --sort-by=.metadata.name
-kubectl get pv --sort-by=.spec.capacity.storage --no-headers
+kubectl get services -A -o 'wide'
+kubectl get rs --sort-by='.metadata.name'
+kubectl get pv --sort-by='.spec.capacity.storage' --no-headers
kubectl get po --sort-by='.status.containerStatuses[0].restartCount'
-kubectl get events --sort-by .metadata.creationTimestamp
-kubectl get pods --field-selector=status.phase=Running
+kubectl get events --sort-by '.metadata.creationTimestamp'
+kubectl get pods --field-selector='status.phase=Running'
kubectl get node -l='!node-role.kubernetes.io/master'
kubectl get replicasets -l 'environment in (prod, qa)'
kubectl get deploy --selector 'tier,tier notin (frontend)'
# Extract information from resources' definition.
-kubectl get deployment nginx -o yaml
-kubectl get cm kube-root-ca.crt -o jsonpath='{.data.ca\.crt}'
+kubectl get deployment 'nginx' -o 'yaml'
+kubectl get cm 'kube-root-ca.crt' -o jsonpath='{.data.ca\.crt}'
kubectl get po -o=jsonpath='{.items..metadata.name}'
-kubectl get po -l app=redis -o jsonpath='{.items[*].metadata.labels.version}'
+kubectl get po -l 'app=redis' -o jsonpath='{.items[*].metadata.labels.version}'
kubectl get nodes \
-o jsonpath='{.items[*].status.addresses[?(@.type=="ExternalIP")].address}'
@@ -126,9 +126,9 @@ kubectl get nodes \
| grep "Ready=True"
# List all secrets currently in use by a Pod.
-kubectl get pods -o json \
+kubectl get pods -o 'json' \
| jq '.items[].spec.containers[].env[]?.valueFrom.secretKeyRef.name' \
-| grep -v null | sort | uniq
+| grep -v 'null' | sort | uniq
# List the name of Pods belonging to a particular RC.
SELECTOR=${$(kubectl get rc my-rc --output=json | jq -j '.spec.selector | to_entries | .[] | "\(.key)=\(.value),"')%?} kubectl get pods -l=$SELECTOR \
@@ -144,17 +144,17 @@ kubectl get pods --all-namespaces \
# Produce a period-delimited tree of all keys returned for nodes.
# Helpful when trying to locate a specific key within a complex nested JSON
# structure.
-kubectl get nodes -o json | jq -c 'path(..)|[.[]|tostring]|join(".")'
+kubectl get nodes -o 'json' | jq -c 'path(..)|[.[]|tostring]|join(".")'
# Show detailed information about resources.
kubectl describe node pi
-kubectl describe deploy,rs,po -l app=redis
+kubectl describe deploy,rs,po -l 'app=redis'
# Create resources from manifests.
-kubectl apply -f manifest.yaml
-kubectl apply -f path/to/m1.yaml -f ./m2.yaml
-kubectl apply -f dir/
-kubectl apply -f https://git.io/vPieo
+kubectl apply -f 'manifest.yaml'
+kubectl apply -f 'path/to/m1.yaml' -f './m2.yaml'
+kubectl apply -f 'dir/'
+kubectl apply -f 'https://git.io/vPieo'
cat <<-EOF | kubectl apply -f -
apiVersion: v1
kind: Secret
@@ -171,18 +171,27 @@ EOF
kubectl diff -f ./manifest.yaml
# Start a Pod.
-kubectl run nginx --image nginx
-kubectl run busybox --rm -it --image=busybox -n keda -- sh
+kubectl run 'nginx' --image 'nginx'
+kubectl run 'busybox' --rm -it --image='busybox' -n 'keda' -- sh
+kubectl run 'alpine' --restart=Never -it --image 'alpine' -- sh
+kubectl run 'ephemeral' --image=registry.k8s.io/pause:3.1 --restart=Never
# Start a Pod and write its specs into a file.
-kubectl run nginx --image=nginx --dry-run=client -o yaml > pod.yaml
+kubectl run 'nginx' --image='nginx' --dry-run='client' -o 'yaml' > 'pod.yaml'
# Create a single instance deployment of 'nginx'.
-kubectl create deployment nginx --image=nginx
+kubectl create deployment 'nginx' --image 'nginx'
-# Start a Job using an existing Job as template
-kubectl create job backup-before-upgrade-13.6.2-to-13.9.2 \
- --from=cronjob.batch/backup -n gitlab
+# Start a Job printing "Hello World".
+kubectl create job 'hello' --image 'busybox:1.28' -- echo "Hello World"
+
+# Start a Job using an existing Job as template.
+kubectl create job 'backup-before-upgrade-13.6.2-to-13.9.2' \
+ --from=cronjob.batch/backup -n 'gitlab'
+
+# Start a CronJob printing "Hello World" every minute.
+kubectl create cronjob 'hello' --image=busybox:1.28 --schedule="*/1 * * * *" \
+ -- echo "Hello World"
# Wait for a pod to be 'ready'.
kubectl wait --for 'condition=ready' --timeout 120s \
@@ -210,7 +219,7 @@ kubectl rollout status -w deployment/frontend
kubectl rollout restart deployment/frontend
# Replace a Pod based on the JSON passed into stdin.
-cat pod.json | kubectl replace -f -
+cat 'pod.json' | kubectl replace -f -
# Force replacement, deletion and recreation (in this order) of resources.
# This Will cause a service outage.
@@ -225,8 +234,9 @@ kubectl get pod mypod -o yaml \
| sed 's/\(image: myimage\):.*$/\1:v4/' \
| kubectl replace -f -
-# Add Labels to resources.
-kubectl label pods nginx custom-name=awesome
+# Add Labels.
+kubectl label pods 'nginx' 'custom-name=awesome'
+kubectl label ns 'default' 'pod-security.kubernetes.io/enforce=privileged'
# Add Annotations.
kubectl annotate pods alpine icon-url=http://goo.gl/XXBTWq
@@ -336,6 +346,9 @@ kubectl taint nodes node1 key1=value1:NoSchedule-
# If a taint with that key and effect already exists, replace its value.
kubectl taint nodes foo dedicated=special-user:NoSchedule
+# Execute a privileged, debug container.
+kubectl debug -it 'node/docker-desktop' --image 'busybox:1.28'
+
# Mark Nodes as unschedulable.
kubectl cordon my-node
@@ -351,6 +364,14 @@ kubectl top node my-node
# Listen on port 5000 on the local machine and forward connections to port 6000
# of my-pod
kubectl port-forward my-pod 5000:6000
+
+# Show Containers' status, properties and capabilities from the inside.
+# Run the command from *inside* the container.
+cat /proc/1/status
+
+# Check a container's capabilities.
+# Run the command from *inside* the container.
+grep 'Cap' /proc/1/status
```
## Configuration
@@ -546,6 +567,8 @@ Verbosity | Description
- [Taints and Tolerations]
- [Commands reference]
- [Configure access to multiple clusters]
+- [Configure a Security Context for a Pod or Container]
+- [Enforce Pod Security Standards with Namespace Labels]
## Sources
@@ -554,14 +577,16 @@ Verbosity | Description
- [Run a replicated stateful application]
- [Accessing an application on Kubernetes in Docker]
-
+
[assigning pods to nodes]: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/
[cheatsheet]: https://kubernetes.io/docs/reference/kubectl/cheatsheet
[commands reference]: https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands
+[configure a security context for a pod or container]: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
[configure access to multiple clusters]: https://kubernetes.io/docs/tasks/access-application-cluster/configure-access-multiple-clusters/
+[enforce pod security standards with namespace labels]: https://kubernetes.io/docs/tasks/configure-pod-container/enforce-standards-namespace-labels/
[taints and tolerations]: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/
-
+
[accessing an application on kubernetes in docker]: https://medium.com/@lizrice/accessing-an-application-on-kubernetes-in-docker-1054d46b64b1
[run a replicated stateful application]: https://kubernetes.io/docs/tasks/run-application/run-replicated-stateful-application/
[run a single-instance stateful application]: https://kubernetes.io/docs/tasks/run-application/run-single-instance-stateful-application/
diff --git a/knowledge base/kubernetes.md b/knowledge base/kubernetes.md
index 17e5f32..bd3e165 100644
--- a/knowledge base/kubernetes.md
+++ b/knowledge base/kubernetes.md
@@ -6,18 +6,23 @@ Hosted by the [Cloud Native Computing Foundation][cncf].
1. [Composition](#composition)
1. [The control plane](#the-control-plane)
1. [kube-apiserver](#kube-apiserver)
- 2. [etcd](#etcd)
- 3. [kube-scheduler](#kube-scheduler)
- 4. [kube-controller-manager](#kube-controller-manager)
- 5. [cloud-controller-manager](#cloud-controller-manager)
- 2. [The worker Nodes](#the-worker-nodes)
+ 1. [etcd](#etcd)
+ 1. [kube-scheduler](#kube-scheduler)
+ 1. [kube-controller-manager](#kube-controller-manager)
+ 1. [cloud-controller-manager](#cloud-controller-manager)
+ 1. [The worker Nodes](#the-worker-nodes)
1. [kubelet](#kubelet)
- 2. [kube-proxy](#kube-proxy)
- 3. [Container runtime](#container-runtime)
- 3. [Addons](#addons)
-2. [The API](#the-api)
-3. [Managed Kubernetes Services](#managed-kubernetes-services)
-4. [Sources](#sources)
+ 1. [kube-proxy](#kube-proxy)
+ 1. [Container runtime](#container-runtime)
+ 1. [Addons](#addons)
+1. [The API](#the-api)
+1. [Managed Kubernetes Services](#managed-kubernetes-services)
+1. [Security](#security)
+ 1. [Highly privileged containers](#highly-privileged-containers)
+ 1. [Capabilities](#capabilities)
+ 1. [Privileged container vs privilege escalation](#privileged-container-vs-privilege-escalation)
+1. [Further readings](#further-readings)
+1. [Sources](#sources)
## Composition
@@ -107,7 +112,7 @@ The software that is responsible for running containers.
Kubernetes supports container runtimes like `containerd`, `CRI-O`, and any other implementation of the Kubernetes CRI (Container Runtime Interface).
-### Addons
+#### Addons
Addons use Kubernetes resources (_DaemonSet_, _Deployment_, etc) to implement cluster features, and as such namespaced resources for addons belong within the `kube-system` namespace.
@@ -131,15 +136,119 @@ The Kubernetes API can be extended:
Cloud providers offer managed versions.
+## Security
+
+### Highly privileged containers
+
+Some workloads (e.g. [ElasticSearch]) might require to change one or more system settings for performance, stability, or other issues.
+This is usually achieved executing the change from a Container with high privileges, which has access to the Node's resources and breaks the isolation Containers are usually famous for. If compromised, an attacker can use this highly privileged container to gain access to the underlying Node.
+
+To mitigate this, [Kubernetes introduced the design of a Security Context][security context design proposal].
+From this document:
+
+> A security context is a set of constraints that are applied to a Container in order to achieve the following goals (from the [Security design][Security Design Proposal]):
+>
+> - ensure a **clear isolation** between the Container and the underlying host it runs on;
+> - **limit** the ability of the Container to negatively impact the infrastructure or other Containers.
+>
+> [The main idea is that] **Containers should only be granted the access they need to perform their work**. The Security Context takes advantage of containerization features such as the ability to [add or remove capabilities][Runtime privilege and Linux capabilities in Docker containers] to give a process some privileges, but not all the privileges of the `root` user.
+
+#### Capabilities
+
+Adding capabilities to a Container is **not** making it _privileged_, **nor** allowing _privilege escalation_. It is just giving the Container the ability to write to specific files or devices depending on the given capability.
+
+This means having a capability assigned does **not** automatically make the Container able to wreak havoc on a Node, and this practice **can be a legitimate use** of this feature instead.
+
+From the feature's `man` page:
+
+> Linux divides the privileges traditionally associated with superuser into distinct units, known as _capabilities_, which can be independently enabled and disabled. Capabilities are a per-thread attribute.
+
+This also means a Container will be **limited** to its contents, plus the capabilities it has been assigned.
+
+Some capabilities are assigned to all Containers by default, while others (the ones which could cause more issues) require to be **explicitly** set using the Containers' `securityContext.capabilities.add` property.
+If a Container is _privileged_ (see [Privileged container vs privilege escalation](#privileged-container-vs-privilege-escalation)), it will have access to **all** the capabilities, with no regards of what are explicitly assigned to it.
+
+Check:
+- [Linux capabilities], to see what capabilities can be assigned to a process **in a Linux system**;
+- [Runtime privilege and Linux capabilities in Docker containers] for the capabilities available **inside Kubernetes**, and
+- [Container capabilities in Kubernetes] for a handy table associating capabilities in Kubernetes to their Linux variant.
+
+#### Privileged container vs privilege escalation
+
+A _privileged container_ is very different from a _container leveraging privilege escalation_.
+
+A **privileged container** does whatever a processes running directly on the Node can.
+It will have automatically assigned **all** [capabilities](#capabilities), and being `root` in this container is effectively being `root` on the Node it is running on.
+
+> For a Container to be _privileged_, its definition **requires the `securityContext.privileged` property set to `true`**.
+
+**Privilege escalation** allows **a process inside the Container** to gain more privileges than its parent process.
+The process will be able to assume `root`-like powers, but will have access only to the **assigned** [capabilities](#capabilities) and generally have limited to no access to the Node like any other Container.
+
+> For a Container to _leverage privilege escalation_, its definition **requires the `securityContext.allowPrivilegeEscalation` property**:
+>
+> - to **either** be set to `true`, or
+> - to **not be set** at all **if**:
+> - the Container is already privileged, or
+> - the Container has `SYS_ADMIN` capabilities.
+>
+> This property directly controls whether the [`no_new_privs`][No New Privileges Design Proposal] flag gets set on the Container's process.
+
+From the [design document for `no_new_privs`][No New Privileges Design Proposal]:
+
+> In Linux, the `execve` system call can grant more privileges to a newly-created process than its parent process. Considering security issues, since Linux kernel v3.5, there is a new flag named `no_new_privs` added to prevent those new privileges from being granted to the processes.
+>
+> `no_new_privs` is inherited across `fork`, `clone` and `execve` and **can not be unset**. With `no_new_privs` set, `execve` promises not to grant the privilege to do anything that could not have been done without the `execve` call.
+>
+> For more details about `no_new_privs`, please check the [Linux kernel documentation][no_new_privs linux kernel documentation].
+>
+> […]
+>
+> To recap, below is a table defining the default behavior at the pod security policy level and what can be set as a default with a pod security policy:
+>
+> | allowPrivilegeEscalation setting | uid = 0 or unset | uid != 0 | privileged/CAP_SYS_ADMIN |
+> | -------------------------------- | ------------------ | ------------------ | ------------------------ |
+> | nil | no_new_privs=true | no_new_privs=false | no_new_privs=false |
+> | false | no_new_privs=true | no_new_privs=true | no_new_privs=false |
+> | true | no_new_privs=false | no_new_privs=false | no_new_privs=false |
+
+## Further readings
+
+- Kubernetes' [security context design proposal]
+- Kubernetes' [No New Privileges Design Proposal]
+- [Linux kernel documentation about `no_new_privs`][no_new_privs linux kernel documentation]
+- [Linux capabilities]
+- [Runtime privilege and Linux capabilities in Docker containers]
+- [Container capabilities in Kubernetes]
+- [Configure a Security Context for a Pod or a Container], specifically the [Set capabilities for a Container] section
+- [Kubernetes SecurityContext Capabilities Explained]
+- [Best practices for pod security in Azure Kubernetes Service (AKS)]
+- [`kubectl`][kubectl]
+
## Sources
-- [Concepts]
+All the references in the [further readings] section, plus the following:
+
+- Kubernetes' [concepts]
[api deprecation policy]: https://kubernetes.io/docs/reference/using-api/deprecation-policy/
[concepts]: https://kubernetes.io/docs/concepts/
+[configure a security context for a pod or a container]: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
+[no new privileges design proposal]: https://github.com/kubernetes/design-proposals-archive/blob/main/auth/no-new-privs.md
+[security context design proposal]: https://github.com/kubernetes/design-proposals-archive/blob/main/auth/security_context.md
+[security design proposal]: https://github.com/kubernetes/design-proposals-archive/blob/main/auth/security.md
+[set capabilities for a container]: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-capabilities-for-a-container
+[kubectl]: kubectl.md
+[best practices for pod security in azure kubernetes service (aks)]: https://learn.microsoft.com/en-us/azure/aks/developer-best-practices-pod-security
[cncf]: https://www.cncf.io/
+[container capabilities in kubernetes]: https://unofficial-kubernetes.readthedocs.io/en/latest/concepts/policy/container-capabilities/
+[elasticsearch]: https://github.com/elastic/helm-charts/issues/689
+[kubernetes securitycontext capabilities explained]: https://www.golinuxcloud.com/kubernetes-securitycontext-capabilities/
+[linux capabilities]: https://man7.org/linux/man-pages/man7/capabilities.7.html
+[no_new_privs linux kernel documentation]: https://www.kernel.org/doc/Documentation/prctl/no_new_privs.txt
+[runtime privilege and linux capabilities in docker containers]: https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities