mirror of
https://gitea.com/mcereda/oam.git
synced 2026-02-09 05:44:23 +00:00
chore(ssm): review ansible integration
This commit is contained in:
@@ -164,34 +164,68 @@ aws ssm describe-instance-associations-status --instance-id 'instance-id'
|
|||||||
## Integrate with Ansible
|
## Integrate with Ansible
|
||||||
|
|
||||||
Create a dynamic inventory which name ends with `aws_ec2.yml` (e.g. `test.aws_ec2.yml` or simply `aws_ec2.yml`).<br/>
|
Create a dynamic inventory which name ends with `aws_ec2.yml` (e.g. `test.aws_ec2.yml` or simply `aws_ec2.yml`).<br/>
|
||||||
Refer the [amazon.aws.aws_ec2 inventory] for more information about the file specifications.<br/>
|
|
||||||
It needs to be named like that to be found by the
|
It needs to be named like that to be found by the
|
||||||
['community.aws.aws_ssm' connection plugin][community.aws.aws_ssm connection].
|
['community.aws.aws_ssm' connection plugin][community.aws.aws_ssm connection].
|
||||||
|
|
||||||
|
Refer the [amazon.aws.aws_ec2 inventory] for more information about the file specifications.
|
||||||
|
|
||||||
|
> [!important]
|
||||||
|
> Even if this is a YAML file, it must **not** start with '---'.<br/>
|
||||||
|
> Ansible will **fail** parsing it in this case.
|
||||||
|
|
||||||
```yml
|
```yml
|
||||||
# File: 'aws_ec2.yml'.
|
plugin: amazon.aws.aws_ec2
|
||||||
plugin: aws_ec2
|
region: eu-north-1
|
||||||
regions:
|
include_filters:
|
||||||
- eu-east-2
|
- # exclude instances that are not running, which are inoperable
|
||||||
|
instance-state-name: running
|
||||||
exclude_filters:
|
exclude_filters:
|
||||||
- tag-key:
|
- tag-key:
|
||||||
- aws:eks:cluster-name # EKS nodes do not use SSM-capable images
|
- aws:eks:cluster-name # skip EKS nodes, since they are managed in their own way
|
||||||
include_filters:
|
- # skip GitLab Runners, since they are volatile and managed in their own way
|
||||||
- instance-state-name: running
|
tag:Application:
|
||||||
keyed_groups:
|
- GitLab
|
||||||
- key: tags.Name
|
tag:Component:
|
||||||
# add hosts to 'tag_Name_<tag_value>' groups for each aws_ec2 host's 'Tags.Name' attribute
|
- Runner
|
||||||
prefix: tag_Name_
|
use_ssm_inventory:
|
||||||
separator: ""
|
# requires 'ssm:GetInventory' permissions on 'arn:aws:ssm:<region>:<account-id>:*'
|
||||||
- key: tags.application
|
# this makes the sync fail miserably if configured on AWX inventories
|
||||||
# add hosts to 'tag_application_<tag_value>' groups for each aws_ec2 host's 'Tags.application' attribute
|
true
|
||||||
prefix: tag_application_
|
|
||||||
separator: ""
|
|
||||||
hostnames:
|
hostnames:
|
||||||
- instance-id
|
- instance-id
|
||||||
# acts as keyword to use the instances' 'InstanceId' attribute
|
# acts as keyword to use the instances' 'InstanceId' attribute
|
||||||
# use 'private-ip-address' to use the instances' 'PrivateIpAddress' attribute instead
|
# use 'private-ip-address' to use the instances' 'PrivateIpAddress' attribute instead
|
||||||
# or any option in <https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-instances.html#options> really
|
# or any option in <https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-instances.html#options> really
|
||||||
|
keyed_groups:
|
||||||
|
# add hosts to '<prefix>_<value>' groups for each aws_ec2 host's matching attribute
|
||||||
|
# e.g.: 'arch_x86_64', 'os_Name_Amazon_Linux', 'tag_Name_GitLab_Server'
|
||||||
|
- key: architecture
|
||||||
|
prefix: arch
|
||||||
|
- key: ssm_inventory.platform_name
|
||||||
|
prefix: os_Name
|
||||||
|
- key: ssm_inventory.platform_type
|
||||||
|
prefix: os_Type
|
||||||
|
- key: ssm_inventory.platform_version
|
||||||
|
prefix: os_Version
|
||||||
|
# - key: tags # would create a group per each tag value; prefer limiting groups to the useful ones
|
||||||
|
# prefix: tag
|
||||||
|
- key: tags.Team
|
||||||
|
prefix: tag_Team
|
||||||
|
- key: tags.Environment
|
||||||
|
prefix: tag_Environment
|
||||||
|
- key: tags.Application
|
||||||
|
prefix: tag_Application
|
||||||
|
- key: tags.Component
|
||||||
|
prefix: tag_Component
|
||||||
|
- key: tags.Name
|
||||||
|
prefix: tag_Name
|
||||||
|
compose:
|
||||||
|
# add extra host variables
|
||||||
|
# use non-jinja values (e.g. strings) by wrapping them in two sets of quotes
|
||||||
|
# if using awx, prefer keeping double quotes external (e.g. "'something'") as it just looks better in the ui
|
||||||
|
ansible_connection: "'aws_ssm'"
|
||||||
|
ansible_aws_ssm_region: "'eu-north-1'"
|
||||||
|
ansible_aws_ssm_timeout: "'300'"
|
||||||
```
|
```
|
||||||
|
|
||||||
Pitfalls:
|
Pitfalls:
|
||||||
@@ -199,15 +233,15 @@ Pitfalls:
|
|||||||
- One **shall not use the `remote_user` connection option**, as it is not supported by the plugin.<br/>
|
- One **shall not use the `remote_user` connection option**, as it is not supported by the plugin.<br/>
|
||||||
From the [plugin notes][aws_ssm connection plugin notes]:
|
From the [plugin notes][aws_ssm connection plugin notes]:
|
||||||
|
|
||||||
> The `community.aws.aws_ssm` connection plugin does not support using the `remote_user` and `ansible_user` variables
|
> The `community.aws.aws_ssm` connection plugin does not support using the `remote_user` and `ansible_user`
|
||||||
> to configure the remote user. The `become_user` parameter should be used to configure which user to run commands
|
> variables to configure the remote user. The `become_user` parameter should be used to configure which user to run
|
||||||
> as. Remote commands will often default to running as the `ssm-agent` user, however this will also depend on how SSM
|
> commands as. Remote commands will often default to running as the `ssm-agent` user, however this will also depend
|
||||||
> has been configured.
|
> on how SSM has been configured.
|
||||||
|
|
||||||
- SSM sessions' duration is limited by SSM's _idle session timeout_ setting.<br/>
|
- SSM sessions' duration is limited by SSM's _idle session timeout_ setting.<br/>
|
||||||
That might impact tasks that need to run for more than said duration.
|
That might impact tasks that need to run for more than said duration.
|
||||||
|
|
||||||
<details style="padding-bottom: 1em">
|
<details style="padding: 0 0 1rem 1rem">
|
||||||
|
|
||||||
Some modules (e.g.: `community.postgresql.postgresql_db`) got their session terminated and SSM retried the task,
|
Some modules (e.g.: `community.postgresql.postgresql_db`) got their session terminated and SSM retried the task,
|
||||||
killing and restarting the running process.<br/>
|
killing and restarting the running process.<br/>
|
||||||
@@ -216,27 +250,27 @@ Pitfalls:
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
Consider extending the SSM idle session timeout setting, or using `async` tasks (which come with their own SSM
|
Consider extending the SSM idle session timeout setting, or using `async` tasks to circumvent this issue.<br/>
|
||||||
caveats) to circumvent this issue.
|
Mind that `async` tasks come with their own SSM caveats.
|
||||||
|
|
||||||
- Since [SSM starts shell sessions under `/usr/bin`][gotchas], one must explicitly set Ansible's temporary directory to
|
- Since [SSM starts shell sessions under `/usr/bin`][gotchas], one must explicitly set Ansible's temporary directory to
|
||||||
a folder the remote user can write to ([source][ansible temp dir change]).
|
a folder the remote user can write to ([source][ansible temp dir change]).
|
||||||
|
|
||||||
<details style="padding-bottom: 1em">
|
<details style="padding: 0 0 1rem 1rem">
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
ANSIBLE_REMOTE_TMP="/tmp/.ansible-${USER}/tmp" ansible…
|
ANSIBLE_REMOTE_TMP="/tmp/.ansible/tmp" ansible…
|
||||||
```
|
```
|
||||||
|
|
||||||
```ini
|
```ini
|
||||||
# file: ansible.cfg
|
# file: ansible.cfg
|
||||||
remote_tmp=/tmp/.ansible-${USER}/tmp
|
remote_tmp=/tmp/.ansible/tmp
|
||||||
```
|
```
|
||||||
|
|
||||||
```diff
|
```diff
|
||||||
- hosts: all
|
- hosts: all
|
||||||
+ vars:
|
+ vars:
|
||||||
+ ansible_remote_tmp: /tmp/.ansible-ssm-user/tmp
|
+ ansible_remote_tmp: /tmp/.ansible/tmp
|
||||||
tasks: …
|
tasks: …
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -248,7 +282,7 @@ Pitfalls:
|
|||||||
- In similar fashion to the point above, SSM might mess up the directory used by `async` tasks.<br/>
|
- In similar fashion to the point above, SSM might mess up the directory used by `async` tasks.<br/>
|
||||||
To avoid this, set it to a folder the remote user can write to.
|
To avoid this, set it to a folder the remote user can write to.
|
||||||
|
|
||||||
<details style="padding-bottom: 1em">
|
<details style="padding: 0 0 1rem 1rem">
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
ANSIBLE_ASYNC_DIR="/tmp/.ansible-${USER}/async" ansible…
|
ANSIBLE_ASYNC_DIR="/tmp/.ansible-${USER}/async" ansible…
|
||||||
@@ -262,24 +296,25 @@ Pitfalls:
|
|||||||
```diff
|
```diff
|
||||||
- hosts: all
|
- hosts: all
|
||||||
+ vars:
|
+ vars:
|
||||||
+ ansible_async_dir: /tmp/.ansible-ssm-user/async
|
+ ansible_async_dir: /tmp/.ansible/async
|
||||||
tasks: …
|
tasks: …
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
- When using `async` tasks, SSM will fire the task and disconnect<br/>
|
- When using `async` tasks, SSM will fire the task and disconnect<br/>
|
||||||
This makes the task **fail**, but the process will still run on the target host.
|
This made the task **fail** at some point. Even so, the process will still run on the target host.
|
||||||
|
|
||||||
<details style="margin-top: -1em; padding: 0 0 1em 0;">
|
<details style="padding: 0 0 1rem 1rem">
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"changed": false,
|
"failed": 0,
|
||||||
"module_stderr": "",
|
"started": 1,
|
||||||
"module_stdout": "\u001b]0;@ip-172-31-42-42:/usr/bin\u0007{\"failed\": 0, \"started\": 1, \"finished\": 0, \"ansible_job_id\": \"j604343782826.4885\", \"results_file\": \"/tmp/.ansible-ssm-user/async/j604343782826.4885\", \"_ansible_suppress_tmpdir_delete\": true}\r\r",
|
"finished": 0,
|
||||||
"msg": "MODULE FAILURE\nSee stdout/stderr for the exact error",
|
"ansible_job_id": "j604343782826.4885",
|
||||||
"rc": 0
|
"results_file": "/tmp/.ansible/async/j604343782826.4885",
|
||||||
|
"_ansible_suppress_tmpdir_delete": true
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -288,43 +323,46 @@ Pitfalls:
|
|||||||
Fire these tasks with `poll` set to `0` and forcing a specific failure test.<br/>
|
Fire these tasks with `poll` set to `0` and forcing a specific failure test.<br/>
|
||||||
Then, use a different task to check up on them.
|
Then, use a different task to check up on them.
|
||||||
|
|
||||||
|
> [!important]
|
||||||
> When checking up tasks with `ansible.builtin.async_status`, SSM will use a single connection.<br/>
|
> When checking up tasks with `ansible.builtin.async_status`, SSM will use a single connection.<br/>
|
||||||
> Said connection must be kept alive until the end of the task.
|
> Consider keeping alive said connection until the end of the task.
|
||||||
|
>
|
||||||
|
> FIXME: check. This seems to not happen anymore.
|
||||||
|
|
||||||
<details>
|
<details style="padding: 0 0 1rem 1rem">
|
||||||
|
<summary>Example</summary>
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
- name: Dump a DB from an RDS instance
|
- name: Dump a PostgreSQL DB from an RDS instance
|
||||||
|
hosts: all
|
||||||
vars:
|
vars:
|
||||||
ansible_connection: community.aws.aws_ssm
|
ansible_connection: amazon.aws.aws_ssm
|
||||||
ansible_remote_tmp: /tmp/.ansible-ssm-user/tmp #-- see previous gotchas
|
ansible_remote_tmp: /tmp/.ansible/tmp #-- see pitfalls (ssm starts sessions in '/usr/bin')
|
||||||
ansible_async_dir: /tmp/.ansible-ssm-user/async #-- see previous gotchas
|
ansible_async_dir: /tmp/.ansible/async #-- see pitfalls (ssm starts sessions in '/usr/bin')
|
||||||
wanted_pattern_in_module_output: >-
|
pg_dump_max_wait_in_seconds: "{{ 60 * 60 * 2 }}" #-- wait up to 2 hours (60s * 60m * 2h)
|
||||||
{{ '"failed": 0, "started": 1, "finished": 0' | regex_escape() }}
|
pg_dump_check_delay_in_seconds: 60 #-- avoid overloading the ssm agent with sessions
|
||||||
community.postgresql.postgresql_db: { … }
|
pg_dump_check_retries: #-- max_wait/delay
|
||||||
async: "{{ 60 * 60 * 2 }}" #-- wait up to 2 hours ( 60s * 60m * 2h )
|
"{{ pg_dump_max_wait_in_seconds/pg_dump_check_delay_in_seconds) }}"
|
||||||
poll: 0 #-- fire and forget; ssm would not allow self-checking anyways
|
tasks:
|
||||||
register: dump
|
- name: Dump the DB from the RDS instance
|
||||||
changed_when:
|
community.postgresql.postgresql_db: { … }
|
||||||
- dump.rc == 0
|
async: "{{ pg_dump_max_wait_in_seconds | int }}"
|
||||||
- dump.module_stderr == ''
|
poll: 0 #-- fire and forget; ssm would not allow self-checking anyways
|
||||||
- "'started' | extract(dump.module_stdout | regex_search('{.*}') | from_json) == 1"
|
register: pg_dump_task_execution #-- expected: { "failed": 0, "started": 1, "finished": 0 }
|
||||||
- "'failed' | extract(dump.module_stdout | regex_search('{.*}') | from_json) == 0"
|
changed_when:
|
||||||
failed_when: dump.rc != 0 #-- specify the failure yourself
|
- pg_dump_task_execution.started == 1
|
||||||
- name: Check on the dump task
|
- pg_dump_task_execution.failed == 0
|
||||||
vars:
|
failed_when: pg_dump_task_execution.failed == 1 #-- specify the failure yourself
|
||||||
max_wait: "{{ 60 * 60 * 2 }}" #-- wait for the async task to end
|
- name: Check on the PG dump task
|
||||||
ansible_aws_ssm_timeout: "{{ max_wait }}" #-- keep active the ssm connection the whole time
|
vars:
|
||||||
ansible_remote_tmp: /tmp/.ansible-ssm-user/tmp #-- see previous gotchas
|
ansible_aws_ssm_timeout: "{{ pg_dump_max_wait_in_seconds }}" #-- keep the connection active the whole time
|
||||||
ansible_async_dir: /tmp/.ansible-ssm-user/async #-- see previous gotchas
|
ansible_job_id: "{{ dump_stdout_as_obj.ansible_job_id }}"
|
||||||
dump_stdout_as_obj: "{{ dump.module_stdout | regex_search('{.*}') | from_json }}"
|
ansible.builtin.async_status:
|
||||||
ansible_job_id: "{{ dump_stdout_as_obj.ansible_job_id }}"
|
jid: "{{ pg_dump_task_execution.ansible_job_id }}"
|
||||||
ansible.builtin.async_status:
|
register: pg_dump_task_execution_result
|
||||||
jid: "{{ ansible_job_id }}"
|
until: pg_dump_task_execution_result.finished
|
||||||
register: dump_result
|
retries: "{{ pg_dump_check_retries | int }}" #-- mind the argument's type
|
||||||
until: dump_result.finished
|
delay: "{{ pg_dump_check_delay_in_seconds | int }}" #-- mind the argument's type
|
||||||
retries: "{{ (max_wait/60) | int }}" #-- ( ( ( max_wait/60s ) * 1/( delay/60s ) ) | int )
|
|
||||||
delay: 60 #-- set high to avoid overloading the ssm agent with sessions
|
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|||||||
@@ -15,18 +15,25 @@ include_filters:
|
|||||||
- # exclude instances that are not running, which are inoperable
|
- # exclude instances that are not running, which are inoperable
|
||||||
instance-state-name: running
|
instance-state-name: running
|
||||||
exclude_filters:
|
exclude_filters:
|
||||||
- # skip EKS nodes, since they are managed in their own way
|
- tag-key:
|
||||||
tag-key:
|
- aws:eks:cluster-name # skip EKS nodes, since they are managed in their own way
|
||||||
- aws:eks:cluster-name
|
|
||||||
- # skip GitLab Runners, since they are volatile and managed in their own way
|
- # skip GitLab Runners, since they are volatile and managed in their own way
|
||||||
tag:Application:
|
tag:Application:
|
||||||
- GitLab
|
- GitLab
|
||||||
tag:Component:
|
tag:Component:
|
||||||
- Runner
|
- Runner
|
||||||
use_ssm_inventory: true # requires 'ssm:GetInventory' permissions on 'arn:aws:ssm:<region>:<account-id>:*'
|
use_ssm_inventory:
|
||||||
|
# requires 'ssm:GetInventory' permissions on 'arn:aws:ssm:<region>:<account-id>:*'
|
||||||
|
# this makes the sync fail miserably if configured on AWX inventories
|
||||||
|
true
|
||||||
hostnames:
|
hostnames:
|
||||||
- instance-id
|
- instance-id
|
||||||
|
# acts as keyword to use the instances' 'InstanceId' attribute
|
||||||
|
# use 'private-ip-address' to use the instances' 'PrivateIpAddress' attribute instead
|
||||||
|
# or any option in <https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-instances.html#options> really
|
||||||
keyed_groups:
|
keyed_groups:
|
||||||
|
# add hosts to '<prefix>_<value>' groups for each aws_ec2 host's matching attribute
|
||||||
|
# e.g.: 'arch_x86_64', 'os_Name_Amazon_Linux', 'tag_Name_GitLab_Server'
|
||||||
- key: architecture
|
- key: architecture
|
||||||
prefix: arch
|
prefix: arch
|
||||||
- key: ssm_inventory.platform_name
|
- key: ssm_inventory.platform_name
|
||||||
@@ -48,6 +55,7 @@ keyed_groups:
|
|||||||
- key: tags.Name
|
- key: tags.Name
|
||||||
prefix: tag_Name
|
prefix: tag_Name
|
||||||
compose:
|
compose:
|
||||||
|
# add extra host variables
|
||||||
# use non-jinja values (e.g. strings) by wrapping them in two sets of quotes
|
# use non-jinja values (e.g. strings) by wrapping them in two sets of quotes
|
||||||
# if using awx, prefer keeping double quotes external (e.g. "'something'") as it just looks better in the ui
|
# if using awx, prefer keeping double quotes external (e.g. "'something'") as it just looks better in the ui
|
||||||
ansible_connection: "'aws_ssm'"
|
ansible_connection: "'aws_ssm'"
|
||||||
|
|||||||
Reference in New Issue
Block a user