feat(gitlab/runners/k8s): improve examples adding affinity and improving resource management

This commit is contained in:
Michele Cereda
2024-07-29 18:55:25 +02:00
parent 95190b36cc
commit eb250de820
3 changed files with 130 additions and 11 deletions

10
.vscode/settings.json vendored
View File

@@ -110,6 +110,7 @@
"clamscan", "clamscan",
"cloudinit", "cloudinit",
"cloudquery", "cloudquery",
"cloudzero",
"cmds", "cmds",
"commitlint", "commitlint",
"commitlintrc", "commitlintrc",
@@ -142,6 +143,7 @@
"eeprom", "eeprom",
"epel", "epel",
"essid", "essid",
"execve",
"fallocate", "fallocate",
"fargate", "fargate",
"fastboot", "fastboot",
@@ -167,6 +169,7 @@
"groupmask", "groupmask",
"growpart", "growpart",
"hadolint", "hadolint",
"hazelcast",
"hdparm", "hdparm",
"healthcheck", "healthcheck",
"helmfile", "helmfile",
@@ -203,6 +206,7 @@
"libexec", "libexec",
"lighttpd", "lighttpd",
"localdomain", "localdomain",
"lockfiles",
"lucene", "lucene",
"luci", "luci",
"lvextend", "lvextend",
@@ -215,8 +219,10 @@
"mpiexec", "mpiexec",
"multiarch", "multiarch",
"netcat", "netcat",
"nfsmount",
"nindent", "nindent",
"nmap", "nmap",
"nodepool",
"nproc", "nproc",
"nvme", "nvme",
"ocsp", "ocsp",
@@ -248,6 +254,8 @@
"poweroff", "poweroff",
"powerpipe", "powerpipe",
"powersave", "powersave",
"preemptible",
"privs",
"psql", "psql",
"pstate", "pstate",
"pulumi", "pulumi",
@@ -290,6 +298,7 @@
"subvolume", "subvolume",
"swapfile", "swapfile",
"swapon", "swapon",
"sysctls",
"sysrc", "sysrc",
"systool", "systool",
"taskfile", "taskfile",
@@ -319,6 +328,7 @@
"usermod", "usermod",
"userspace", "userspace",
"vaultwarden", "vaultwarden",
"velero",
"venv", "venv",
"wazuh", "wazuh",
"whalebrew", "whalebrew",

View File

@@ -12,15 +12,15 @@ image:
image: gitlab-org/gitlab-runner image: gitlab-org/gitlab-runner
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
gitlabUrl: https://gitlab.company.com/ gitlabUrl: https://gitlab.example.org/
unregisterRunners: true unregisterRunners: true
terminationGracePeriodSeconds: 3600 terminationGracePeriodSeconds: 3600
concurrent: 10 concurrent: 50
shutdown_timeout: 0 shutdown_timeout: 0
checkInterval: 30 checkInterval: 15
sessionServer: sessionServer:
enabled: false enabled: false
@@ -51,6 +51,7 @@ runners:
[[runners]] [[runners]]
[runners.cache] [runners.cache]
Type = "s3" Type = "s3"
Path = "runner/" Path = "runner/"
Shared = true Shared = true
@@ -58,11 +59,11 @@ runners:
[runners.cache.s3] [runners.cache.s3]
ServerAddress = "s3.amazonaws.com" ServerAddress = "s3.amazonaws.com"
BucketName = "company-ci" BucketName = "example-ci"
BucketLocation = "eu-west-1" BucketLocation = "eu-west-1"
[runners.kubernetes] [runners.kubernetes]
namespace = "{{.Release.Namespace}}"
image = "alpine" image = "alpine"
pull_policy = [ pull_policy = [
"if-not-present", "if-not-present",
@@ -74,6 +75,67 @@ runners:
"never" "never"
] ]
namespace = "{{.Release.Namespace}}"
cpu_limit = "2"
cpu_limit_overwrite_max_allowed = "15"
cpu_request = "0"
cpu_request_overwrite_max_allowed = "15"
ephemeral_storage_limit = "512Mi"
ephemeral_storage_limit_overwrite_max_allowed = "49Gi"
ephemeral_storage_request = "0"
ephemeral_storage_request_overwrite_max_allowed = "49Gi"
helper_cpu_limit = "0.5"
helper_cpu_limit_overwrite_max_allowed = "0.9"
helper_cpu_request = "0"
helper_cpu_request_overwrite_max_allowed = "0.9"
helper_ephemeral_storage_limit = "64Mi"
helper_ephemeral_storage_limit_overwrite_max_allowed = "1Gi"
helper_ephemeral_storage_request = "0"
helper_ephemeral_storage_request_overwrite_max_allowed = "1Gi"
helper_memory_limit = "128Mi"
helper_memory_limit_overwrite_max_allowed = "1Gi"
helper_memory_request = "0"
helper_memory_request_overwrite_max_allowed = "1Gi"
memory_limit = "2Gi"
memory_limit_overwrite_max_allowed = "62Gi"
memory_request = "0"
memory_request_overwrite_max_allowed = "62Gi"
service_cpu_limit = "1"
service_cpu_limit_overwrite_max_allowed = "3.9"
service_cpu_request = "0"
service_cpu_request_overwrite_max_allowed = "3.9"
service_ephemeral_storage_limit_overwrite_max_allowed = "15Gi"
service_ephemeral_storage_request_overwrite_max_allowed = "15Gi"
service_memory_limit = "0.5Gi"
service_memory_limit_overwrite_max_allowed = "15.5Gi"
service_memory_request = "0"
service_memory_request_overwrite_max_allowed = "15.5Gi"
[runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity]
[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms]]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "app.example.org"
operator = "In"
values = [ "gitlab-runner" ]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "eks.amazonaws.com/capacityType"
operator = "In"
values = [ "ON_DEMAND" ]
[runners.kubernetes.node_tolerations]
"app.example.org=gitlab-runner" = "NoSchedule"
"node-role.kubernetes.io/master" = "NoSchedule"
configPath: "" configPath: ""
name: "test-runner-on-k8s" name: "test-runner-on-k8s"
secret: gitlab-runner-token secret: gitlab-runner-token

View File

@@ -151,7 +151,7 @@ Procedure:
<summary>Example helm chart values</summary> <summary>Example helm chart values</summary>
```yaml ```yaml
gitlabUrl: https://gitlab.example.com/ gitlabUrl: https://gitlab.example.org/
unregisterRunners: true unregisterRunners: true
concurrent: 20 concurrent: 20
checkInterval: 3 checkInterval: 3
@@ -181,6 +181,16 @@ runners:
namespace = "{{.Release.Namespace}}" namespace = "{{.Release.Namespace}}"
name: "runner-on-k8s" name: "runner-on-k8s"
secret: gitlab-runner-token secret: gitlab-runner-token
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: eks.amazonaws.com/capacityType
operator: In
values:
- ON_DEMAND
tolerations: tolerations:
- key: app - key: app
operator: Equal operator: Equal
@@ -203,6 +213,25 @@ Gotchas:
Improvements: Improvements:
- Keep the manager pod on stable nodes.
<details style="margin-bottom: 1em;">
```yaml
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: eks.amazonaws.com/capacityType
operator: In
values:
- ON_DEMAND
```
</details>
- Dedicate specific nodes to runner executors.<br/> - Dedicate specific nodes to runner executors.<br/>
Taint dedicated nodes and add tolerations and affinities to the runner's configuration. Taint dedicated nodes and add tolerations and affinities to the runner's configuration.
@@ -215,7 +244,6 @@ Improvements:
[runners.kubernetes.node_selector] [runners.kubernetes.node_selector]
gitlab = "true" gitlab = "true"
"kubernetes.io/arch" = "amd64" "kubernetes.io/arch" = "amd64"
"eks.amazonaws.com/capacityType" = "ON_DEMAND"
[runners.kubernetes.affinity] [runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity] [runners.kubernetes.affinity.node_affinity]
@@ -225,6 +253,19 @@ Improvements:
key = "app" key = "app"
operator = "In" operator = "In"
values = [ "gitlab-runner" ] values = [ "gitlab-runner" ]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "customLabel"
operator = "In"
values = [ "customValue" ]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "eks.amazonaws.com/capacityType"
operator = "In"
values = [ "ON_DEMAND" ]
[runners.kubernetes.node_tolerations] [runners.kubernetes.node_tolerations]
"app=gitlab-runner" = "NoSchedule" "app=gitlab-runner" = "NoSchedule"
@@ -236,24 +277,30 @@ Improvements:
</details> </details>
- Avoid massive resource consumption by defaulting to (very?) strict resource requests and limits. - Avoid massive resource consumption by defaulting to (very?) strict resource limits and `0` request.
<details style="margin-bottom: 1em;"> <details style="margin-bottom: 1em;">
```toml ```toml
[[runners]] [[runners]]
[runners.kubernetes] [runners.kubernetes]
cpu_request = "0.1" cpu_request = "0"
cpu_limit = "2" cpu_limit = "2"
memory_request = "1Gi" memory_request = "0"
memory_limit = "2Gi" memory_limit = "2Gi"
ephemeral_storage_request = "0"
ephemeral_storage_limit = "512Mi" ephemeral_storage_limit = "512Mi"
helper_cpu_request = "0"
helper_cpu_limit = "0.5" helper_cpu_limit = "0.5"
helper_memory_request = "0"
helper_memory_limit = "128Mi" helper_memory_limit = "128Mi"
helper_ephemeral_storage_request = "0"
helper_ephemeral_storage_limit = "64Mi" helper_ephemeral_storage_limit = "64Mi"
service_cpu_request = "0"
service_cpu_limit = "1" service_cpu_limit = "1"
service_memory_request = "0"
service_memory_limit = "0.5Gi" service_memory_limit = "0.5Gi"
``` ```
@@ -317,7 +364,7 @@ concurrent = 40
[[runners]] [[runners]]
name = "static-scaler" name = "static-scaler"
url = "https://gitlab.example.com" url = "https://gitlab.example.org"
token = "abcdefghijklmnopqrst" token = "abcdefghijklmnopqrst"
executor = "docker+machine" executor = "docker+machine"