From 441e90a04ece824bb9fa9c37b7bcec48001480ee Mon Sep 17 00:00:00 2001 From: Michele Cereda Date: Thu, 10 Apr 2025 00:15:59 +0200 Subject: [PATCH] refactor(kb/peerdb): add operations --- knowledge base/peerdb.md | 402 +++++++++++++++++++++++++++-------- knowledge base/postgresql.md | 2 + 2 files changed, 320 insertions(+), 84 deletions(-) diff --git a/knowledge base/peerdb.md b/knowledge base/peerdb.md index c3812d1..f17d291 100644 --- a/knowledge base/peerdb.md +++ b/knowledge base/peerdb.md @@ -3,6 +3,10 @@ Fast, simple, and cost effective Postgres replication. 1. [TL;DR](#tldr) +1. [Peers](#peers) +1. [Mirrors](#mirrors) +1. [Alerts](#alerts) +1. [Gotchas](#gotchas) 1. [Further readings](#further-readings) 1. [Sources](#sources) @@ -20,7 +24,299 @@ Fast, simple, and cost effective Postgres replication.
- Gotchas + Setup + +```sh +git clone 'https://github.com/PeerDB-io/peerdb.git' \ +&& docker compose -f 'peerdb/docker-compose.yml' up -d +``` + +
+ +
+ Usage + +```sh +# Connect in SQL mode. +psql 'host=localhost port=9900 password=peerdb' +psql 'postgresql://peerdb.example.org:9900/?password=peerdb' + +# Use the REST APIs. +curl -fsS --url 'http://localhost:3000/api/v1/peers/list' --request 'GET' \ + --header "Authorization: Basic $(printf '%s' ':' 'your password here' | base64)" +curl -fsS --url 'http://localhost:3000/api/v1/peers/create' --request 'POST' \ + --header "Authorization: Basic $(printf '%s' ':' 'your password here' | base64)" \ + --header 'Content-Type: application/json' \ + --data '{ … }' +``` + +
+ +
+ Real world use cases + +```sh +# List peers. +psql "host=localhost port=9900 password=$(gopass show -o 'peerdb/instance')" -c "SELECT id, name, type FROM peers;" +curl -fsS --url 'http://localhost:3000/api/v1/peers/list' \ + -H "Authorization: Basic $(gopass show -o 'peerdb/instance' | xargs printf '%s' ':' | base64)" +``` + +
+ +## Peers + +Peers are connection settings to databases that PeerDB can operate upon. + +_Source_ PostgreSQL peers **require** logical replication to be enabled. + +
+ +```sql +-- Check settings +sourceDb=> SELECT name,setting FROM pg_settings WHERE name IN ('wal_level','rds.logical_replication'); + name | setting +-------------------------+--------- + rds.logical_replication | on + wal_level | logical +(2 rows) +``` + +```sql +-- Configure sources +ALTER SYSTEM SET wal_level = logical; +ALTER SYSTEM SET max_wal_senders = 10; +ALTER SYSTEM SET max_replication_slots = 10; +``` + +
+ +Operations: + +
+ List + +```sql +SELECT id, name, type FROM peers; +``` + +```plaintext +GET /api/v1/peers/list +``` + +
+ +
+ Create or update + +```sql +CREATE PEER IF NOT EXISTS some_postgresql_peer +FROM POSTGRES +WITH ( + host='pg.example.org', + port='5432', + database='postgres', + user='postgres', + password='password' +); +``` + +| Peer type | `peer.type` attribute | Configuration attribute | +| ---------- | --------------------- | ----------------------- | +| ClickHouse | `8` | `clickhouse_config` | +| Kafka | `9` | `kafka_config` | +| PostgreSQL | `3` or `'POSTGRES'` | `postgres_config` | + +> The optional `"allow_update": true` attribute in the API seems to do **absolutely nothing** as of the time of writing. + +```plaintext +POST /api/v1/peers/create +{ + "allow_update": true, + "peer": { + "name": "some_postgresql_peer", + "type": "POSTGRES", + "postgres_config": { + "host": "pg.example.org", + "port": "5432", + "database": "postgres", + "user": "postgres", + "password": "password" + } + } +} +``` + +
+ +
+ Delete + +```sql +DELETE FROM peers WHERE name == 'some_postgresql_peer'; +``` + +
+ +## Mirrors + +Mirrors can be in the following states: + +| State | Returned string | Description | +| ---------- | ------------------- | ----------------------------------------------------------------------------------------------------- | +| Setup | `STATUS_SETUP` | The mirror is creating the target tables and metadata tables | +| Snapshot | `STATUS_SNAPSHOT` | The mirror is currently performing the initial snapshot of the tables defined in the mapping | +| Running | `STATUS_RUNNING` | The mirror has completed the initial snapshot, and is in its CDC phase | +| Pausing | `STATUS_PAUSING` | The mirror is in its CDC phase, and is in the process of pausing | +| Paused | `STATUS_PAUSED` | The mirror is in its CDC phase, and is paused | +| Terminated | `STATUS_TERMINATED` | The mirror has been deleted/terminated | +| Unknown | `STATUS_UNKNOWN` | The mirror is not found in PeerDB's catalog, or its status cannot be obtained due to some other issue | + +Mirrors using _PostgreSQL_ peers as sources create [replication slots] in the source DB to get changes from. + +Operations: + +
+ List + +```plaintext +GET /api/v1/mirrors/list +``` + +
+ +
+ Create + +| Field | Type | Required | Default | Notes | +| --------------------------------------------- | --------------- | -------- | -------------------- | ------------------------------------------------ | +| `flow_job_name` | string | yes | | name of the mirror | +| `source_name` | string | yes | | name of the source peer | +| `destination_name` | string | yes | | name of the destination peer | +| `table_mappings` | array | yes | | | +| `table_mappings.source_table_identifier` | string | yes | | source schema and table | +| `table_mappings.destination_table_identifier` | string | yes | | destination schema and table | +| `table_mappings.exclude` | list of strings | no | [] | columns excluded from the sync | +| `table_mappings.columns` | list of objects | no | [] | ordering setting; for ClickHouse only | +| `table_mappings.columns.name` | string | yes | | name of the column | +| `table_mappings.columns.ordering` | number | yes | | rank of the column | +| `idle_timeout_seconds` | number | no | 60 | | +| `publication_name` | string | no | | will be created if not provided | +| `max_batch_size` | number | no | 1000000 | | +| `do_initial_snapshot` | boolean | yes | | | +| `snapshot_num_rows_per_partition` | number | no | 1000000 | only used for the initial snapshot | +| `snapshot_max_parallel_workers` | number | no | 4 | only used for the initial snapshot | +| `snapshot_num_tables_in_parallel` | number | no | 1 | only used for the initial snapshot | +| `resync` | boolean | no | false | the mirror **must be dropped** before re-syncing | +| `initial_snapshot_only` | boolean | no | false | | +| `soft_delete_col_name` | string | no | `_PEERDB_IS_DELETED` | | +| `synced_at_col_name` | string | no | `_PEERDB_SYNCED_AT` | | + +```sql +CREATE MIRROR IF NOT EXISTS some_cdc_mirror +FROM main_pg TO snowflake_prod -- FROM source_peer TO target_peer +WITH TABLE MAPPING +( + public.regions:main_pg.regions, -- source_schema.table:target_schema.table + { + from: public.countries, -- source_schema.table + to: main_pg.countries, -- target_schema.table + exclude: [ local_name, size, … ] -- column_1, …, column_N + }, + … +) +WITH ( do_initial_copy = true ); +``` + +```plaintext +POST /api/v1/flows/cdc/create +{ + "connection_configs": { + "flow_job_name": "some_cdc_mirror", + "source_name": "main_pg", + "destination_name": "snowflake_prod", + "do_initial_snapshot": true, + "table_mappings": [ + { + "source_table_identifier": "public.regions", + "destination_table_identifier": "main_pg.regions" + }, + { + "source_table_identifier": "public.countries", + "destination_table_identifier": "main_pg.countries", + "exclude": [ + "local_name", + "size", + … + ] + }, + … + ] + } +}' +``` + +
+ +
+ Get status + +```plaintext +POST /api/v1/mirrors/status +{ + "flowJobName": "some_cdc_mirror" +} +``` + +
+ +
+ Show configuration + +```plaintext +POST /api/v1/mirrors/status +{ + "flowJobName": "some_cdc_mirror", + "includeFlowInfo": true +} +``` + +
+ +## Alerts + +Operations: + +
+ Create + +```plaintext +POST /api/v1/alerts/config +{ + "config": { + "id": -1, + "service_type": "slack", + "service_config": "{\"slot_lag_mb_alert_threshold\":15000,\"open_connections_alert_threshold\":20,\"auth_token\":\"xoxb-012345678901-0123456789012-1234ABcdEFGhijKLMnopQRST\",\"channel_ids\":[\"C01K23X4567\"]}", + "alert_for_mirrors": [ + "some_cdc_mirror", + "some_other_mirror" + ] + } +} +``` + +
+ +
+ Show configuration + +```plaintext +GET /api/v1/alerts/config +``` + +
+ +## Gotchas - The [documentation] is **sorely lacking**. @@ -33,7 +329,7 @@ Fast, simple, and cost effective Postgres replication. - API responses hide error messages behind a `200 OK` HTTP status code as of 2025-03-19. -
+
Response example Output of a `ansible.builtin.uri` Ansible task executed against the PeerDB server: @@ -52,11 +348,11 @@ Fast, simple, and cost effective Postgres replication.
-- PeerDB seems unable to connect to peers which `host` parameter is `localhost` or `127.0.0.1`, but can connect to the - IP address of the system running the service (e.g., `192.168.1.10`).
+- PeerDB seems **unable** to connect to peers which `host` parameter is `localhost` or `127.0.0.1`, but **can** connect + to the IP address of the system running the service (e.g., `192.168.1.10`).
This is most likely a Docker-related issue. -
+
```sh $ docker run --rm --name 'postgres' -d -p '10000:5432' -e POSTGRES_PASSWORD='password' 'postgres:15.5' @@ -86,11 +382,11 @@ Fast, simple, and cost effective Postgres replication. - PostgreSQL peers do **not** accept connection options as of 2025-03-19.
This makes it impossible to specify any or override defaults. -
+
The connection string is composed in code.
- The [data structure specifying its parameters][peers.proto#PostgresConfig] does **not** accept options, **nor** explicit - connection strings. + The [data structure specifying its parameters][peers.proto#PostgresConfig] does **not** accept options, **nor** + explicit connection strings. ```go // https://github.com/PeerDB-io/peerdb/blob/6a591128908cbd76df8f7e4094ec838fac08dcda/protos/peers.proto#L73 @@ -112,7 +408,7 @@ Fast, simple, and cost effective Postgres replication. Peers seemingly **require** SSL to connect to them for some reason, or fail the password authentication when given the correct credentials. -
+
```sh $ nc -vz dblab.example.org 6005 @@ -135,90 +431,24 @@ Fast, simple, and cost effective Postgres replication.
-- When creating alerts through the APIs, the alert's ID in the request's data must be `-1`.
- This **will** create duplicates. - - SQL mode is provided by a translation service, which intercepts the `CREATE PEER` (or other resource) command and uses it to create the correct resources in the PostgreSQL backend.
The translator does **not** expose **all** the resources (e.g., I could find no alert configuration), **nor** allows for easy updates (e.g. the peers and mirrors data is encoded).
The data for peers and mirrors is encoded in ways that are **not** disclosed in the [documentation]. -
+- Newly created mirrors will start replication right away.
+ Unless explicitly specified in their definition, this usually means taking an initial snapshot of the mapped tables + from the source peer. -
- Setup +- When in the `snapshot` state, mirrors **cannot** be paused.
+ If stopped (like stopping, restarting, or killing the container), it **will break** and will need to be restarted. -
- Check requirements +- **Paused** mirrors using PostgreSQL peers as source will **not** consume the logical replication's transaction log, + which **will** blow up in size (depending on the number of changes made to the source DB). -```sql -sourceDb=> SELECT name,setting FROM pg_settings WHERE name IN ('wal_level','rds.logical_replication'); - name | setting --------------------------+--------- - rds.logical_replication | on - wal_level | logical -(2 rows) -``` - -```sql -ALTER SYSTEM SET wal_level = logical; -ALTER SYSTEM SET max_wal_senders = 10; -ALTER SYSTEM SET max_replication_slots = 10; -``` - -
- -```sh -git clone 'https://github.com/PeerDB-io/peerdb.git' \ -&& docker compose -f 'peerdb/docker-compose.yml' up -d -``` - -
- -
- Usage - -```sh -# Connect in SQL mode. -psql 'host=localhost port=9900 password=peerdb' -psql 'postgresql://peerdb.example.org:9900/?password=peerdb' - -# Use the APIs. -curl -fsS --url 'http://localhost:3000/api/v1/peers/list' -X 'GET' \ - -H "Authorization: Basic $(printf '%s' ':' 'your password here' | base64)" -``` - -```sql --- List peers. -SELECT id, name, type FROM peers; - --- Create peers. -CREATE PEER IF NOT EXISTS some_pg_peer FROM POSTGRES WITH ( - host='some.pg.fqdn', - port='5432', - database='postgres', - user='postgres', - password='password' -); - --- Delete peers. -DELETE FROM peers WHERE name == 'some_pg_peer'; -``` - -
- -
- Real world use cases - -```sh -# List peers. -psql "host=localhost port=9900 password=$(gopass show -o 'peerdb/instance')" -c "SELECT id, name, type FROM peers;" -curl -fsS --url 'http://localhost:3000/api/v1/peers/list' \ - -H "Authorization: Basic $(gopass show -o 'peerdb/instance' | xargs printf '%s' ':' | base64)" -``` - -
+- When creating alerts through the APIs, the alert's ID in the request's data must be `-1`.
+ This **will** create duplicates. ## Further readings @@ -230,6 +460,8 @@ curl -fsS --url 'http://localhost:3000/api/v1/peers/list' \ - [Public IPs For PeerDB Cloud] - [API Reference] +- [SQL reference] +- [Replication Slots] +[replication slots]: https://www.postgresql.org/docs/current/logicaldecoding-explanation.html#LOGICALDECODING-REPLICATION-SLOTS diff --git a/knowledge base/postgresql.md b/knowledge base/postgresql.md index fdf2c93..e9f197f 100644 --- a/knowledge base/postgresql.md +++ b/knowledge base/postgresql.md @@ -235,6 +235,7 @@ See also [yugabyte/yugabyte-db]. - [pgAdmin] - [How to Scale a Single-Server Database: A Guide to Distributed PostgreSQL] - [yugabyte/yugabyte-db] +- [Logical Decoding Concepts] ### Sources @@ -270,6 +271,7 @@ See also [yugabyte/yugabyte-db]. [create function]: https://www.postgresql.org/docs/current/sql-createfunction.html [database connection control functions]: https://www.postgresql.org/docs/current/libpq-connect.html [docker image]: https://github.com/docker-library/docs/blob/master/postgres/README.md +[logical decoding concepts]: https://www.postgresql.org/docs/current/logicaldecoding-explanation.html [pg_settings]: https://www.postgresql.org/docs/current/view-pg-settings.html [psql]: https://www.postgresql.org/docs/current/app-psql.html [the password file]: https://www.postgresql.org/docs/current/libpq-pgpass.html