feat(gitlab/runners/k8s): improve examples adding affinity and improving resource management

This commit is contained in:
Michele Cereda
2024-07-29 18:55:25 +02:00
parent 95190b36cc
commit eb250de820
3 changed files with 130 additions and 11 deletions

10
.vscode/settings.json vendored
View File

@@ -110,6 +110,7 @@
"clamscan",
"cloudinit",
"cloudquery",
"cloudzero",
"cmds",
"commitlint",
"commitlintrc",
@@ -142,6 +143,7 @@
"eeprom",
"epel",
"essid",
"execve",
"fallocate",
"fargate",
"fastboot",
@@ -167,6 +169,7 @@
"groupmask",
"growpart",
"hadolint",
"hazelcast",
"hdparm",
"healthcheck",
"helmfile",
@@ -203,6 +206,7 @@
"libexec",
"lighttpd",
"localdomain",
"lockfiles",
"lucene",
"luci",
"lvextend",
@@ -215,8 +219,10 @@
"mpiexec",
"multiarch",
"netcat",
"nfsmount",
"nindent",
"nmap",
"nodepool",
"nproc",
"nvme",
"ocsp",
@@ -248,6 +254,8 @@
"poweroff",
"powerpipe",
"powersave",
"preemptible",
"privs",
"psql",
"pstate",
"pulumi",
@@ -290,6 +298,7 @@
"subvolume",
"swapfile",
"swapon",
"sysctls",
"sysrc",
"systool",
"taskfile",
@@ -319,6 +328,7 @@
"usermod",
"userspace",
"vaultwarden",
"velero",
"venv",
"wazuh",
"whalebrew",

View File

@@ -12,15 +12,15 @@ image:
image: gitlab-org/gitlab-runner
imagePullPolicy: IfNotPresent
gitlabUrl: https://gitlab.company.com/
gitlabUrl: https://gitlab.example.org/
unregisterRunners: true
terminationGracePeriodSeconds: 3600
concurrent: 10
concurrent: 50
shutdown_timeout: 0
checkInterval: 30
checkInterval: 15
sessionServer:
enabled: false
@@ -51,6 +51,7 @@ runners:
[[runners]]
[runners.cache]
Type = "s3"
Path = "runner/"
Shared = true
@@ -58,11 +59,11 @@ runners:
[runners.cache.s3]
ServerAddress = "s3.amazonaws.com"
BucketName = "company-ci"
BucketName = "example-ci"
BucketLocation = "eu-west-1"
[runners.kubernetes]
namespace = "{{.Release.Namespace}}"
image = "alpine"
pull_policy = [
"if-not-present",
@@ -74,6 +75,67 @@ runners:
"never"
]
namespace = "{{.Release.Namespace}}"
cpu_limit = "2"
cpu_limit_overwrite_max_allowed = "15"
cpu_request = "0"
cpu_request_overwrite_max_allowed = "15"
ephemeral_storage_limit = "512Mi"
ephemeral_storage_limit_overwrite_max_allowed = "49Gi"
ephemeral_storage_request = "0"
ephemeral_storage_request_overwrite_max_allowed = "49Gi"
helper_cpu_limit = "0.5"
helper_cpu_limit_overwrite_max_allowed = "0.9"
helper_cpu_request = "0"
helper_cpu_request_overwrite_max_allowed = "0.9"
helper_ephemeral_storage_limit = "64Mi"
helper_ephemeral_storage_limit_overwrite_max_allowed = "1Gi"
helper_ephemeral_storage_request = "0"
helper_ephemeral_storage_request_overwrite_max_allowed = "1Gi"
helper_memory_limit = "128Mi"
helper_memory_limit_overwrite_max_allowed = "1Gi"
helper_memory_request = "0"
helper_memory_request_overwrite_max_allowed = "1Gi"
memory_limit = "2Gi"
memory_limit_overwrite_max_allowed = "62Gi"
memory_request = "0"
memory_request_overwrite_max_allowed = "62Gi"
service_cpu_limit = "1"
service_cpu_limit_overwrite_max_allowed = "3.9"
service_cpu_request = "0"
service_cpu_request_overwrite_max_allowed = "3.9"
service_ephemeral_storage_limit_overwrite_max_allowed = "15Gi"
service_ephemeral_storage_request_overwrite_max_allowed = "15Gi"
service_memory_limit = "0.5Gi"
service_memory_limit_overwrite_max_allowed = "15.5Gi"
service_memory_request = "0"
service_memory_request_overwrite_max_allowed = "15.5Gi"
[runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity]
[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms]]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "app.example.org"
operator = "In"
values = [ "gitlab-runner" ]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "eks.amazonaws.com/capacityType"
operator = "In"
values = [ "ON_DEMAND" ]
[runners.kubernetes.node_tolerations]
"app.example.org=gitlab-runner" = "NoSchedule"
"node-role.kubernetes.io/master" = "NoSchedule"
configPath: ""
name: "test-runner-on-k8s"
secret: gitlab-runner-token

View File

@@ -151,7 +151,7 @@ Procedure:
<summary>Example helm chart values</summary>
```yaml
gitlabUrl: https://gitlab.example.com/
gitlabUrl: https://gitlab.example.org/
unregisterRunners: true
concurrent: 20
checkInterval: 3
@@ -181,6 +181,16 @@ runners:
namespace = "{{.Release.Namespace}}"
name: "runner-on-k8s"
secret: gitlab-runner-token
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: eks.amazonaws.com/capacityType
operator: In
values:
- ON_DEMAND
tolerations:
- key: app
operator: Equal
@@ -203,6 +213,25 @@ Gotchas:
Improvements:
- Keep the manager pod on stable nodes.
<details style="margin-bottom: 1em;">
```yaml
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: eks.amazonaws.com/capacityType
operator: In
values:
- ON_DEMAND
```
</details>
- Dedicate specific nodes to runner executors.<br/>
Taint dedicated nodes and add tolerations and affinities to the runner's configuration.
@@ -215,7 +244,6 @@ Improvements:
[runners.kubernetes.node_selector]
gitlab = "true"
"kubernetes.io/arch" = "amd64"
"eks.amazonaws.com/capacityType" = "ON_DEMAND"
[runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity]
@@ -225,6 +253,19 @@ Improvements:
key = "app"
operator = "In"
values = [ "gitlab-runner" ]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "customLabel"
operator = "In"
values = [ "customValue" ]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "eks.amazonaws.com/capacityType"
operator = "In"
values = [ "ON_DEMAND" ]
[runners.kubernetes.node_tolerations]
"app=gitlab-runner" = "NoSchedule"
@@ -236,24 +277,30 @@ Improvements:
</details>
- Avoid massive resource consumption by defaulting to (very?) strict resource requests and limits.
- Avoid massive resource consumption by defaulting to (very?) strict resource limits and `0` request.
<details style="margin-bottom: 1em;">
```toml
[[runners]]
[runners.kubernetes]
cpu_request = "0.1"
cpu_request = "0"
cpu_limit = "2"
memory_request = "1Gi"
memory_request = "0"
memory_limit = "2Gi"
ephemeral_storage_request = "0"
ephemeral_storage_limit = "512Mi"
helper_cpu_request = "0"
helper_cpu_limit = "0.5"
helper_memory_request = "0"
helper_memory_limit = "128Mi"
helper_ephemeral_storage_request = "0"
helper_ephemeral_storage_limit = "64Mi"
service_cpu_request = "0"
service_cpu_limit = "1"
service_memory_request = "0"
service_memory_limit = "0.5Gi"
```
@@ -317,7 +364,7 @@ concurrent = 40
[[runners]]
name = "static-scaler"
url = "https://gitlab.example.com"
url = "https://gitlab.example.org"
token = "abcdefghijklmnopqrst"
executor = "docker+machine"