From eb250de82073fee19cf6b04ec9e9e038c05d5b7c Mon Sep 17 00:00:00 2001 From: Michele Cereda Date: Mon, 29 Jul 2024 18:55:25 +0200 Subject: [PATCH] feat(gitlab/runners/k8s): improve examples adding affinity and improving resource management --- .vscode/settings.json | 10 +++ examples/kubernetes/values.gitlab-runner.yaml | 72 +++++++++++++++++-- knowledge base/gitlab/runner.md | 59 +++++++++++++-- 3 files changed, 130 insertions(+), 11 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 7145940..2091f78 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -110,6 +110,7 @@ "clamscan", "cloudinit", "cloudquery", + "cloudzero", "cmds", "commitlint", "commitlintrc", @@ -142,6 +143,7 @@ "eeprom", "epel", "essid", + "execve", "fallocate", "fargate", "fastboot", @@ -167,6 +169,7 @@ "groupmask", "growpart", "hadolint", + "hazelcast", "hdparm", "healthcheck", "helmfile", @@ -203,6 +206,7 @@ "libexec", "lighttpd", "localdomain", + "lockfiles", "lucene", "luci", "lvextend", @@ -215,8 +219,10 @@ "mpiexec", "multiarch", "netcat", + "nfsmount", "nindent", "nmap", + "nodepool", "nproc", "nvme", "ocsp", @@ -248,6 +254,8 @@ "poweroff", "powerpipe", "powersave", + "preemptible", + "privs", "psql", "pstate", "pulumi", @@ -290,6 +298,7 @@ "subvolume", "swapfile", "swapon", + "sysctls", "sysrc", "systool", "taskfile", @@ -319,6 +328,7 @@ "usermod", "userspace", "vaultwarden", + "velero", "venv", "wazuh", "whalebrew", diff --git a/examples/kubernetes/values.gitlab-runner.yaml b/examples/kubernetes/values.gitlab-runner.yaml index 752b9dd..50596b2 100644 --- a/examples/kubernetes/values.gitlab-runner.yaml +++ b/examples/kubernetes/values.gitlab-runner.yaml @@ -12,15 +12,15 @@ image: image: gitlab-org/gitlab-runner imagePullPolicy: IfNotPresent -gitlabUrl: https://gitlab.company.com/ +gitlabUrl: https://gitlab.example.org/ unregisterRunners: true terminationGracePeriodSeconds: 3600 -concurrent: 10 +concurrent: 50 shutdown_timeout: 0 -checkInterval: 30 +checkInterval: 15 sessionServer: enabled: false @@ -51,6 +51,7 @@ runners: [[runners]] [runners.cache] + Type = "s3" Path = "runner/" Shared = true @@ -58,11 +59,11 @@ runners: [runners.cache.s3] ServerAddress = "s3.amazonaws.com" - BucketName = "company-ci" + BucketName = "example-ci" BucketLocation = "eu-west-1" [runners.kubernetes] - namespace = "{{.Release.Namespace}}" + image = "alpine" pull_policy = [ "if-not-present", @@ -74,6 +75,67 @@ runners: "never" ] + namespace = "{{.Release.Namespace}}" + + cpu_limit = "2" + cpu_limit_overwrite_max_allowed = "15" + cpu_request = "0" + cpu_request_overwrite_max_allowed = "15" + ephemeral_storage_limit = "512Mi" + ephemeral_storage_limit_overwrite_max_allowed = "49Gi" + ephemeral_storage_request = "0" + ephemeral_storage_request_overwrite_max_allowed = "49Gi" + helper_cpu_limit = "0.5" + helper_cpu_limit_overwrite_max_allowed = "0.9" + helper_cpu_request = "0" + helper_cpu_request_overwrite_max_allowed = "0.9" + helper_ephemeral_storage_limit = "64Mi" + helper_ephemeral_storage_limit_overwrite_max_allowed = "1Gi" + helper_ephemeral_storage_request = "0" + helper_ephemeral_storage_request_overwrite_max_allowed = "1Gi" + helper_memory_limit = "128Mi" + helper_memory_limit_overwrite_max_allowed = "1Gi" + helper_memory_request = "0" + helper_memory_request_overwrite_max_allowed = "1Gi" + memory_limit = "2Gi" + memory_limit_overwrite_max_allowed = "62Gi" + memory_request = "0" + memory_request_overwrite_max_allowed = "62Gi" + service_cpu_limit = "1" + service_cpu_limit_overwrite_max_allowed = "3.9" + service_cpu_request = "0" + service_cpu_request_overwrite_max_allowed = "3.9" + service_ephemeral_storage_limit_overwrite_max_allowed = "15Gi" + service_ephemeral_storage_request_overwrite_max_allowed = "15Gi" + service_memory_limit = "0.5Gi" + service_memory_limit_overwrite_max_allowed = "15.5Gi" + service_memory_request = "0" + service_memory_request_overwrite_max_allowed = "15.5Gi" + + [runners.kubernetes.affinity] + [runners.kubernetes.affinity.node_affinity] + + [runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution] + [[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms]] + [[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]] + key = "app.example.org" + operator = "In" + values = [ "gitlab-runner" ] + + [[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]] + weight = 1 + + [runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference] + [[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]] + key = "eks.amazonaws.com/capacityType" + operator = "In" + values = [ "ON_DEMAND" ] + + [runners.kubernetes.node_tolerations] + "app.example.org=gitlab-runner" = "NoSchedule" + "node-role.kubernetes.io/master" = "NoSchedule" + + configPath: "" name: "test-runner-on-k8s" secret: gitlab-runner-token diff --git a/knowledge base/gitlab/runner.md b/knowledge base/gitlab/runner.md index 8dbc8d4..d2cd352 100644 --- a/knowledge base/gitlab/runner.md +++ b/knowledge base/gitlab/runner.md @@ -151,7 +151,7 @@ Procedure: Example helm chart values ```yaml -gitlabUrl: https://gitlab.example.com/ +gitlabUrl: https://gitlab.example.org/ unregisterRunners: true concurrent: 20 checkInterval: 3 @@ -181,6 +181,16 @@ runners: namespace = "{{.Release.Namespace}}" name: "runner-on-k8s" secret: gitlab-runner-token +affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ON_DEMAND tolerations: - key: app operator: Equal @@ -203,6 +213,25 @@ Gotchas: Improvements: +- Keep the manager pod on stable nodes. + +
+ + ```yaml + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ON_DEMAND + ``` + +
+ - Dedicate specific nodes to runner executors.
Taint dedicated nodes and add tolerations and affinities to the runner's configuration. @@ -215,7 +244,6 @@ Improvements: [runners.kubernetes.node_selector] gitlab = "true" "kubernetes.io/arch" = "amd64" - "eks.amazonaws.com/capacityType" = "ON_DEMAND" [runners.kubernetes.affinity] [runners.kubernetes.affinity.node_affinity] @@ -225,6 +253,19 @@ Improvements: key = "app" operator = "In" values = [ "gitlab-runner" ] + [[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]] + key = "customLabel" + operator = "In" + values = [ "customValue" ] + + [[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]] + weight = 1 + + [runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference] + [[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]] + key = "eks.amazonaws.com/capacityType" + operator = "In" + values = [ "ON_DEMAND" ] [runners.kubernetes.node_tolerations] "app=gitlab-runner" = "NoSchedule" @@ -236,24 +277,30 @@ Improvements: -- Avoid massive resource consumption by defaulting to (very?) strict resource requests and limits. +- Avoid massive resource consumption by defaulting to (very?) strict resource limits and `0` request.
```toml [[runners]] [runners.kubernetes] - cpu_request = "0.1" + cpu_request = "0" cpu_limit = "2" - memory_request = "1Gi" + memory_request = "0" memory_limit = "2Gi" + ephemeral_storage_request = "0" ephemeral_storage_limit = "512Mi" + helper_cpu_request = "0" helper_cpu_limit = "0.5" + helper_memory_request = "0" helper_memory_limit = "128Mi" + helper_ephemeral_storage_request = "0" helper_ephemeral_storage_limit = "64Mi" + service_cpu_request = "0" service_cpu_limit = "1" + service_memory_request = "0" service_memory_limit = "0.5Gi" ``` @@ -317,7 +364,7 @@ concurrent = 40 [[runners]] name = "static-scaler" - url = "https://gitlab.example.com" + url = "https://gitlab.example.org" token = "abcdefghijklmnopqrst" executor = "docker+machine"