From 78b2e3820fbeeddc06350b0342ded1b1a8eecb83 Mon Sep 17 00:00:00 2001 From: Michele Cereda Date: Wed, 2 Jul 2025 17:10:32 +0200 Subject: [PATCH] chore(opensearch): review and dump final findings --- .../cloud computing/aws/opensearch.md | 87 ++++++------- knowledge base/opensearch.md | 114 +++++++++--------- snippets/opensearch/api.txt | 29 ++++- snippets/opensearch/commands.fish | 10 +- 4 files changed, 138 insertions(+), 102 deletions(-) diff --git a/knowledge base/cloud computing/aws/opensearch.md b/knowledge base/cloud computing/aws/opensearch.md index b10bbe0..d379b13 100644 --- a/knowledge base/cloud computing/aws/opensearch.md +++ b/knowledge base/cloud computing/aws/opensearch.md @@ -6,9 +6,9 @@ Amazon offering for managed OpenSearch clusters. 1. [UltraWarm storage](#ultrawarm-storage) 1. [Cold storage](#cold-storage) 1. [Operations](#operations) - 1. [Migrate indexes to UltraWarm storage](#migrate-indexes-to-ultrawarm-storage) - 1. [Return warm indexes to hot storage](#return-warm-indexes-to-hot-storage) - 1. [Migrate indexes to Cold storage](#migrate-indexes-to-cold-storage) + 1. [Migrate indices to UltraWarm storage](#migrate-indices-to-ultrawarm-storage) + 1. [Return warm indices to hot storage](#return-warm-indices-to-hot-storage) + 1. [Migrate indices to Cold storage](#migrate-indices-to-cold-storage) 1. [Index state management plugin](#index-state-management-plugin) 1. [Snapshots](#snapshots) 1. [Best practices](#best-practices) @@ -19,27 +19,26 @@ Amazon offering for managed OpenSearch clusters. ## Storage -Clusters can be set up to use the [hot-warm architecture].\ -Compared to OpenSearch's, AWS' managed OpenSearch service offers the two extra `UltraWarm` and `Cold` storage options. - -_Hot_ storage provides the fastest possible performance for indexing and searching **new** data. +Clusters can be set up to use the [hot-warm architecture].
+Compared to the plain OpenSearch product, AWS' managed OpenSearch service offers the two extra `UltraWarm` and `Cold` +storage options. +_Hot_ storage provides the fastest possible performance for indexing and searching **new** data.
_Data_ nodes use **hot** storage in the form of instance stores or EBS volumes attached to each node. -Indexes that are **not** actively written to (e.g., immutable data like logs), that are queried less frequently, or that +Indices that are **not** actively written to (e.g., immutable data like logs), that are queried less frequently, or that don't need the hot storage's performance can be moved to _warm_ storage. -Warm indexes are **read-only** unless returned to hot storage.
+Warm indices are **read-only** unless returned to hot storage.
Aside that, they behave like any other hot index. -_UltraWarm_ nodes use **warm** storage in the form of S3 and caching. +[_UltraWarm_][ultrawarm storage for amazon opensearch service] nodes use **warm** storage in the form of S3 and caching. _Cold_ storage is meant for data accessed only occasionally or no longer in active use.
-Cold indexes are normally detached from nodes and stored in S3, meaning one **can't** read from nor write to cold -indexes by default.
-Should one need to query them, one needs to selectively attach them to UltraWarm nodes. +Cold indices are normally detached from nodes and stored in S3, meaning one **can't** read from nor write to cold +indices by default. Should one need to query them, one needs to selectively attach them to UltraWarm nodes. -If using the [hot-warm architecture], leverage the [Index State Management plugin] to automate indexes migration to +If using the [hot-warm architecture], leverage the [Index State Management plugin] to automate indices migration to lower storage states after they meet specific conditions. ### UltraWarm storage @@ -60,13 +59,13 @@ Considerations: - When calculating UltraWarm storage requirements, consider only the size of the primary shards.
S3 removes the need for replicas and abstracts away any operating system or service considerations. - Dashboards and `_cat/indices` will still report UltraWarm index size as the _total_ of all primary and replica shards. -- There are [limits](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/limits.html#limits-ultrawarm) - to the amount of storage each instance type can address and the maximum number of warm nodes supported by Domains. +- There are [limits][ultrawarm storage quotas] to the amount of storage each instance type can address and the maximum + number of warm nodes supported by Domains. - Amazon recommends a maximum shard size of 50 GiB. - Upon enablement, UltraWarm might not be available to use for several hours even if the domain state is _Active_. - The minimum amount of UltraWarm instances allowed by AWS is 2. -> Before disabling UltraWarm, one **must** either delete **all** warm indexes or migrate them back to hot storage.
+> Before disabling UltraWarm, one **must** either delete **all** warm indices or migrate them back to hot storage.
> After warm storage is empty, wait five minutes before attempting to disable UltraWarm. ### Cold storage @@ -80,13 +79,13 @@ Requirements: Considerations: -- One **can't** read from nor write to cold indexes. +- One **can't** read from, nor write to, cold indices. ## Operations -### Migrate indexes to UltraWarm storage +### Migrate indices to UltraWarm storage -> Indexes' health **must** be green to perform migrations. +> Indices' health **must** be green to perform migrations. Migrations are executed one index at a time, sequentially.
There can be up to 200 migrations in the queue.
@@ -94,7 +93,7 @@ Any request that exceeds the limit will be rejected. > Index migrations to UltraWarm storage require a force merge operation, which purges documents that were marked for > deletion.
-> By default, UltraWarm merges indexes into one segment. One can set this value up to 1000. +> By default, UltraWarm merges indices into one segment. One can set this value up to 1000. Migrations might fail during snapshots, shard relocations, or force merges.
Failures during snapshots or shard relocation are typically due to node failures or S3 connectivity issues.
@@ -135,7 +134,7 @@ If a migration is in the queue but has not yet started, it can be removed from t POST _ultrawarm/migration/_cancel/my-index ``` -### Return warm indexes to hot storage +### Return warm indices to hot storage Migrate them back to hot storage: @@ -146,11 +145,11 @@ POST _ultrawarm/migration/my-index/_hot There can be up to 10 queued migrations from warm to hot storage at a time.
Migrations requests are processed one at a time in the order they were queued. -Indexes return to hot storage with **one** replica. +Indices return to hot storage with **one** replica. -### Migrate indexes to Cold storage +### Migrate indices to Cold storage -As for [UltraWarm storage][migrate indexes to ultrawarm storage], just change the endpoints accordingly: +As for [UltraWarm storage][migrate indices to ultrawarm storage], just change the endpoints accordingly: ```plaintext POST _ultrawarm/migration/my-index/_cold @@ -174,12 +173,12 @@ Compared to [OpenSearch] and [ElasticSearch], ISM for Amazon's managed OpenSearc - The managed OpenSearch service supports the three unique ISM operations `warm_migration`, `cold_migration`, and `cold_delete`. - If one's domain has [UltraWarm storage] enabled, the `warm_migration` action transitions indexes to warm storage.\ - If one's domain has [cold storage] enabled, the `cold_migration` action transitions indexes to cold storage, and the + If one's domain has [UltraWarm storage] enabled, the `warm_migration` action transitions indices to warm storage.
+ If one's domain has [cold storage] enabled, the `cold_migration` action transitions indices to cold storage, and the `cold_delete` action deletes them from cold storage. Should one of these actions not complete within the set timeout period, the migration or deletion of the affected - indexes will continue.\ + indices will continue.
Setting an `error_notification` for one of the above actions will send a notification about the action failing, should it not complete within the timeout period, but the notification is only for one's own reference. The actual operation has no inherent timeout, and will continue to run until it eventually succeeds or fails. @@ -189,16 +188,16 @@ Compared to [OpenSearch] and [ElasticSearch], ISM for Amazon's managed OpenSearc - \[should the domain run OpenSearch or Elasticsearch 7.7 or later] The managed OpenSearch service supports the ISM `snapshot` operation. -- Cold indexes API: +- Cold indices API: - Require specifying the `?type=_cold` parameter when you use the following ISM APIs: - Add policy - Remove policy - Update policy - Retry failed index - Explain index - - Do **not** support wildcard operators, except when used at the end of the path.\ + - Do **not** support wildcard operators, except when used at the end of the path.
I.E., `_plugins/_ism/add/logstash-*` is supported, but `_plugins/_ism/add/iad-*-prod` is not. - - Do **not** support multiple index names and patterns.\ + - Do **not** support multiple index names and patterns.
I.E., `_plugins/_ism/remove/app-logs` is supported, but `_plugins/_ism/remove/app-logs,sample-data` is not. - The managed OpenSearch service allows to change only the following ISM settings: @@ -211,10 +210,11 @@ Refer [Snapshots][opensearch snapshots] and [Creating index snapshots in Amazon AWS-managed OpenSearch Service snapshots come in the following forms: -- _Automated_ snapshots: only for cluster recovery, stored in a **preconfigured** S3 bucket at **no** additional cost.\ +- _Automated_ snapshots: only for cluster recovery, stored in a **preconfigured** S3 bucket at **no** additional + cost.
One can use them to restore the domain in the event of red cluster status or data loss. -- _Manual_ snapshots: for cluster recovery or moving data from one cluster to another.\ - Users must be those initiating manual snapshots.\ +- _Manual_ snapshots: for cluster recovery or moving data from one cluster to another.
+ Users must be those initiating manual snapshots.
These snapshots are stored in one's own S3 bucket. Standard S3 charges apply. All AWS-managed OpenSearch Service domains take automated snapshots, but with a frequency difference: @@ -232,10 +232,10 @@ To be able to create snapshots manually: - An S3 bucket must exist to store snapshots. > [!IMPORTANT] - > Manual snapshots do **not** support the S3 Glacier storage class.\ + > Manual snapshots do **not** support the S3 Glacier storage class.
> Do **not** apply any S3 Glacier lifecycle rule to this bucket. -- An IAM role that delegates permissions to the OpenSearch Service must be defined.\ +- An IAM role that delegates permissions to the OpenSearch Service must be defined.
This role must be able to act on the S3 bucket above.
@@ -327,7 +327,7 @@ To be able to create snapshots manually:
-Snapshots can be taken only from indices in the hot or warm storage tiers.\ +Snapshots can be taken only from indices in the hot or warm storage tiers.
Only **one** index from warm storage is allowed at a time, and the request **cannot** contain indices in mixed tiers. ## Best practices @@ -354,7 +354,7 @@ Cluster management tasks are: - Tracking all nodes in the cluster. - Maintaining routing information for nodes in the cluster. -- Tracking the number of indexes in the cluster. +- Tracking the number of indices in the cluster. - Tracking the number of shards belonging to each index. - Updating the cluster state after state changes.
I.e., creating an index and adding or removing nodes in the cluster. @@ -375,7 +375,7 @@ As such, an even number of dedicated master nodes are essentially equivalent to > both fail.
> This behavior differs from the OpenSearch default. -Master nodes size is highly correlated with the data instance size and the number of instances, indexes, and shards they +Master nodes size is highly correlated with the data instance size and the number of instances, indices, and shards they can manage. ## Cost-saving measures @@ -394,10 +394,10 @@ can manage. By default, AWS OpenSearch takes **daily** snapshots and retains them for **14 days**. - If using `gp2` EBS volumes, move to `gp3`. - Enable autoscaling (serverless only). -- Optimize indexes' sharding and replication. +- Optimize indices' sharding and replication. - Optimize queries. - Optimize data ingestion. -- Optimize indexes' mapping and settings. +- Optimize indices' mapping and settings. - Optimize the JVM heap size. - Summarize and compress historical data using [index rollups]. - Check out caches. @@ -438,7 +438,7 @@ can manage. [Cold storage]: #cold-storage [Index State Management plugin]: #index-state-management-plugin -[migrate indexes to ultrawarm storage]: #migrate-indexes-to-ultrawarm-storage +[migrate indices to ultrawarm storage]: #migrate-indices-to-ultrawarm-storage [ultrawarm storage]: #ultrawarm-storage @@ -453,7 +453,7 @@ can manage. [best practices for configuring your amazon opensearch service domain]: https://aws.amazon.com/blogs/big-data/best-practices-for-configuring-your-amazon-opensearch-service-domain/ -[cold storage for amazon opensearch service]: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/cold-storage.html +[Cold storage for amazon opensearch service]: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/cold-storage.html [Creating index snapshots in Amazon OpenSearch Service]: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/managedomains-snapshots.html [dedicated master nodes in amazon opensearch service]: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/managedomains-dedicatedmasternodes.html [how do i reduce the cost of using opensearch service domains?]: https://repost.aws/knowledge-center/opensearch-domain-pricing @@ -463,6 +463,7 @@ can manage. [or1 storage for amazon opensearch service]: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/or1.html [supported instance types in amazon opensearch service]: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/supported-instance-types.html [ultrawarm storage for amazon opensearch service]: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/ultrawarm.html +[UltraWarm storage quotas]: https://docs.aws.amazon.com/opensearch-service/latest/developerguide/limits.html#limits-ultrawarm [cost-saving strategies for aws opensearch(finops): optimize performance without breaking the bank]: https://ramchandra-vadranam.medium.com/cost-saving-strategies-for-aws-opensearch-finops-optimize-performance-without-breaking-the-bank-f87f0bb2ce37 diff --git a/knowledge base/opensearch.md b/knowledge base/opensearch.md index c8b4500..96660fa 100644 --- a/knowledge base/opensearch.md +++ b/knowledge base/opensearch.md @@ -9,12 +9,12 @@ Use cases: application search, log analytics, data observability, data ingestion 1. [TL;DR](#tldr) 1. [Concepts](#concepts) 1. [Node types](#node-types) - 1. [Indexes](#indexes) + 1. [Indices](#indices) 1. [Setup](#setup) 1. [The split brain problem](#the-split-brain-problem) 1. [Tuning](#tuning) 1. [Hot-warm architecture](#hot-warm-architecture) -1. [Manage indexes](#manage-indexes) +1. [Manage indices](#manage-indices) 1. [Index templates](#index-templates) 1. [Composable index templates](#composable-index-templates) 1. [Ingest data](#ingest-data) @@ -32,11 +32,11 @@ Use cases: application search, log analytics, data observability, data ingestion ## TL;DR _Documents_ are the unit storing information, consisting of text or structured data.
-Documents are stored in the JSON format, and returned when related information is searched for.
-Documents are immutable. However, they can be updated by retrieving them, updating the information in them, and -re-indexing them using the same document IDs. +Stored in the JSON format, and returned when related information is searched for.
+The JSON file for a document is immutable, but documents can be updated by retrieving them, updating the information +they contain, and re-indexing them using the same document IDs. -[_Indexes_][indexes] are collections of documents.
+[_Indices_][indices] are collections of documents.
Their contents are queried when information is searched for. _Nodes_ are servers that store data and process search requests.
@@ -46,13 +46,13 @@ Multiple nodes can be aggregated into _Clusters_.
Clusters allow nodes to specialize for different responsibilities depending on their types. Each and every cluster **elects** a _cluster manager node_ is **elected**.
-Manager nodes orchestrate cluster-level operations (e.g., creating indexes). +Manager nodes orchestrate cluster-level operations (e.g., creating indices). Nodes in clusters communicate with each other.
When a request is routed to any node, that node sends requests to the others, gathers their responses, and returns the final response. -Indexes are split into _shards_, each of them storing a subset of all documents in an index.
+Indices are split into _shards_, each of them storing a subset of all documents in an index.
Shards are evenly distributed across nodes in a cluster.
Each shard is effectively a full [Lucene] index. Since each instance of Lucene is a running process consuming CPU and memory, having more shards is **not** necessarily better. @@ -65,7 +65,7 @@ so that replica shards would act as backups in the event of node failures.
Replicas also improve the speed at which the cluster processes search requests, encouraging the use of more than one replica per index for each search-heavy workload. -Indexes use a data structure called an _inverted index_. It maps words to the documents in which they occur.
+Indices use a data structure called an _inverted index_. It maps words to the documents in which they occur.
When searching, OpenSearch matches the words in the query to the words in the documents. Each document is assigned a _relevance score_ indicating how well the document matched the query. @@ -112,7 +112,7 @@ Flushing ensures that the data stored only in the translog is recorded in the [L Flushes are performed as needed to ensure that the translog does not grow too large. -Shards are [Lucene] indexes, which consist of segments (or segment files).
+Shards are [Lucene] indices, which consist of segments (or segment files).
Segments store the indexed data and are **immutable**. _Merge operations_ merge smaller segments into larger ones periodically.
@@ -123,14 +123,14 @@ _Merge policies_ specify the segments' maximum size and how often merge operatio Interaction with the cluster is done via REST [APIs]. -If indexes do not already exist, OpenSearch automatically creates them while [ingesting data][ingest data]. +If indices do not already exist, OpenSearch automatically creates them while [ingesting data][ingest data].
Typical setup order of operations 1. \[optional] Create [index templates]. 1. \[optional] Create [data streams]. -1. \[optional] Create [indexes]. +1. \[optional] Create [indices]. 1. [Ingest data]. 1. Create [index patterns] for the search dashboard to use. @@ -142,7 +142,7 @@ If indexes do not already exist, OpenSearch automatically creates them while [in | Node type | Description | Best practices for production | | ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| Cluster manager | Manages the overall operation of a cluster and keeps track of the cluster state.
This includes creating and deleting indexes, keeping track of the nodes that join and leave the cluster, checking the health of each node in the cluster (by running ping requests), and allocating shards to nodes. | Three dedicated cluster manager nodes in three different availability zones ensures the cluster never loses quorum.
Two nodes will be idle for most of the time, except when one node goes down or needs some maintenance. | +| Cluster manager | Manages the overall operation of a cluster and keeps track of the cluster state.
This includes creating and deleting indices, keeping track of the nodes that join and leave the cluster, checking the health of each node in the cluster (by running ping requests), and allocating shards to nodes. | Three dedicated cluster manager nodes in three different availability zones ensures the cluster never loses quorum.
Two nodes will be idle for most of the time, except when one node goes down or needs some maintenance. | | Cluster manager eligible | Elects one node among them as the cluster manager node through a voting process. | Make sure to have dedicated cluster manager nodes by marking all other node types as not cluster manager eligible. | | Data | Stores and searches data.
Performs all data-related operations (indexing, searching, aggregating) on local shards.
These are the worker nodes and need more disk space than any other node type. | Keep them balanced between zones.
Storage and RAM-heavy nodes are recommended. | | Ingest | Pre-processes data before storing it in the cluster.
Runs an ingest pipeline that transforms data before adding it to an index. | Use dedicated ingest nodes if you plan to ingest a lot of data and run complex ingest pipelines.
Optionally offload your indexing from the data nodes so that they are used exclusively for searching and aggregating. | @@ -161,9 +161,9 @@ After assessing all requirements, it is suggested to use benchmark testing tools Provision a small sample cluster and run tests with varying workloads and configurations. Compare and analyze the system and query metrics for these tests improve upon the architecture. -### Indexes +### Indices -Indexes are collections of documents that one wants to make searchable.
+Indices are collections of documents that one wants to make searchable.
They organize the data for fast retrieval. To maximise one's ability to search and analyse documents, one can define how documents and their fields are stored and @@ -182,7 +182,7 @@ There are two _indexing APIs_: Enormous documents are still better indexed **individually**. -Within indexes, OpenSearch identifies each document using a **unique** _document ID_.
+Within indices, OpenSearch identifies each document using a **unique** _document ID_.
The document's `_id` must be **up to** 512 bytes in size.
Should one **not** provide an ID for the document during ingestion, OpenSearch generates a document ID itself. @@ -191,14 +191,14 @@ Upon receiving indexing requests, OpenSearch: 1. Creates an index if it does not exist already. 1. Stores the ingested document in that index. -Indexes must follow these naming restrictions: +Indices must follow these naming restrictions: - All letters must be **lowercase**. - Index names cannot begin with underscores (`_`) or hyphens (`-`). - Index names cannot contain spaces, commas, or the following characters: `:`, `"`, `*`, `+`, `/`, `\`, `|`, `?`, `#`, `>`, or `<`. -Indexes are configured with _mappings_ and _settings_: +Indices are configured with _mappings_ and _settings_: - Mappings are collections of fields and the types of those fields. - Settings include index data (i.e., the index name, creation date, and number of shards). @@ -215,7 +215,7 @@ Numbers are usually dynamically mapped to `long`.
Should one want to map them to the `date` type instead, one **will** need to delete the index, then recreate it by explicitly specifying the mappings. -_Static_ index settings can only be updated on **closed** indexes.
+_Static_ index settings can only be updated on **closed** indices.
_Dynamic_ index settings can be updated at any time through the [APIs]. ## Setup @@ -435,21 +435,21 @@ Refer [Elasticsearch Split Brain] and [Avoiding the Elasticsearch split brain pr Refer [Set up a hot-warm architecture]. -Enables using the [Index State Management plugin] to automate indexes migration to lower storage states after they meet +Enables using the [Index State Management plugin] to automate indices migration to lower storage states after they meet specific conditions. -## Manage indexes +## Manage indices -Refer [Managing indexes]. +Refer [Managing indices]. -If using the [hot-warm architecture], leverage the [Index State Management plugin] to automate indexes migration to +If using the [hot-warm architecture], leverage the [Index State Management plugin] to automate indices migration to lower storage states after they meet specific conditions. ## Index templates Refer [Index templates][documentation index templates]. -Index templates allow to initialize new indexes with predefined mappings and settings. +Index templates allow to initialize new indices with predefined mappings and settings. ### Composable index templates @@ -541,8 +541,8 @@ Refer [Reindex data]. The `_reindex` operation copies documents from an index, that one selects through a query, over to another index. -When needing to make an extensive change (e.g., adding a new field to every document, move documents between indexes, or -combining multiple indexes into a new one), one can use the `_reindex` operation instead of deleting the old indexes, +When needing to make an extensive change (e.g., adding a new field to every document, move documents between indices, or +combining multiple indices into a new one), one can use the `_reindex` operation instead of deleting the old indices, making the change offline, and then indexing the data again. Re-indexing can be an expensive operation depending on the size of the source index.
@@ -624,11 +624,11 @@ POST _reindex
- Combine indexes + Combine indices -Combine **all** documents from one or more indexes into another by adding the source indexes as a list. +Combine **all** documents from one or more indices into another by adding the source indices as a list. -> The number of shards for your source and destination indexes **must be the same**. +> The number of shards for your source and destination indices **must be the same**. ```plaintext POST _reindex @@ -655,8 +655,8 @@ and observability data in general). They work like any other index, but OpenSearch simplifies some management operations (e.g., rollovers) and stores them in a more efficient way. -They are internally composed of multiple _backing_ indexes.
-Search requests are routed to **all** backing indexes, while indexing requests are routed only to the **latest** write +They are internally composed of multiple _backing_ indices.
+Search requests are routed to **all** backing indices, while indexing requests are routed only to the **latest** write index. ISM policies allow to automatically handle index rollover or deletion. @@ -665,11 +665,11 @@ ISM policies allow to automatically handle index rollover or deletion. Create data streams 1. Create an index template containing `index_pattern: []` and `data_stream: {}`.
- This template will configure all indexes matching the defined patterns as a data stream. + This template will configure all indices matching the defined patterns as a data stream.
- Specifying the `data_stream` object causes the template to create data streams, and not just regular indexes. + Specifying the `data_stream` object causes the template to create data streams, and not just regular indices. ```plaintext PUT _index_template/logs-template @@ -706,8 +706,8 @@ ISM policies allow to automatically handle index rollover or deletion.
1. \[optional] Explicitly create the data stream.
- Since indexes are created with the first document they ingest, if they do not exist already, the data stream can be - created just by starting ingesting documents for the indexes matching its patterns. + Since indices are created with the first document they ingest, if they do not exist already, the data stream can be + created just by starting ingesting documents for the indices matching its patterns.
@@ -878,8 +878,8 @@ DELETE _data_stream/logs-nginx ## Index patterns -Index patterns reference one or more indexes, data streams, or index aliases.
-They are mostly used in dashboards and in the _discover_ tab to filter indexes to gather data from. +Index patterns reference one or more indices, data streams, or index aliases.
+They are mostly used in dashboards and in the _discover_ tab to filter indices to gather data from. They require data to be indexed before creation. @@ -890,12 +890,12 @@ They require data to be indexed before creation. 1. In the _Management_ section of the side menu, select _Dashboards Management_. 1. Select _Index patterns_, then _Create index pattern_. 1. Define the pattern by entering a name in the Index pattern name field.
- Dashboards automatically adds a wildcard (`*`). It will make the pattern match multiple sources or indexes. + Dashboards automatically adds a wildcard (`*`). It will make the pattern match multiple sources or indices. 1. Specify the time field to use when filtering documents on a time base.
Unless otherwise specified in the source or index properties, `@timestamp` will pop up in the dropdown menu. Should one **not** want to use a time filter, select that option from the dropdown menu.
- This will make OpenSearch return **all** the data in **all** the indexes that match the index pattern. + This will make OpenSearch return **all** the data in **all** the indices that match the index pattern. 1. Select _Create index pattern_. @@ -909,9 +909,9 @@ Refer [Index State Management][documentation index state management]. ## Snapshots -Backups of a cluster's indexes and state. +Backups of a cluster's indices and state. -Index snapshots include the affected indexes' data.\ +Index snapshots include the affected indices' data.\ State snapshots includes cluster settings, node information, index metadata (mappings, settings, or templates), and shard allocation. @@ -956,13 +956,13 @@ When taking snapshots, one must specify the name of the snapshot repository and
-This snapshot includes all indexes **and** the cluster's state: +This snapshot includes all indices **and** the cluster's state: ```plaintext PUT _snapshot/some-repository/1 ``` -Add a request body to include or exclude certain indexes, or specify other settings: +Add a request body to include or exclude certain indices, or specify other settings: ```plaintext PUT /_snapshot/my-repository/2 @@ -981,7 +981,7 @@ Check snapshots' progress with `GET _snapshot/_status`. ## APIs OpenSearch clusters offer a REST API.
-It allows almost everything - changing most settings, modify indexes, check cluster health, get statistics, etc. +It allows almost everything - changing most settings, modify indices, check cluster health, get statistics, etc. One can interact with the API using every method that can send HTTP requests.
One can also send HTTP requests in the Dev Tools console in OpenSearch Dashboards. It uses a simpler syntax to format @@ -1256,7 +1256,7 @@ GET /students/_mapping
- Create indexes specifying their mappings + Create indices specifying their mappings ```plaintext PUT /students @@ -1288,9 +1288,9 @@ PUT /students
- Close indexes + Close indices -Disables read and write operations on the impacted indexes. +Disables read and write operations on the impacted indices. ```plaintext POST /prometheus-logs-20231205/_close @@ -1299,9 +1299,9 @@ POST /prometheus-logs-20231205/_close
- (Re)Open closed indexes + (Re)Open closed indices -Enables read and write operations on the impacted indexes. +Enables read and write operations on the impacted indices. ```plaintext POST /prometheus-logs-20231205/_open @@ -1310,9 +1310,9 @@ POST /prometheus-logs-20231205/_open
- Update indexes' settings + Update indices' settings -_Static_ settings can only be updated on **closed** indexes. +_Static_ settings can only be updated on **closed** indices. ```plaintext PUT /prometheus-logs-20231205/_settings @@ -1328,7 +1328,7 @@ PUT /prometheus-logs-20231205/_settings
- Delete indexes + Delete indices ```plaintext DELETE /students @@ -1383,11 +1383,11 @@ DELETE _snapshot/repository-name/snapshot-name - [Elasticsearch Index Lifecycle Management & Policy] - [Top 14 ELK alternatives in 2024] - [Stepping up for a truly open source Elasticsearch] -- [Managing indexes] +- [Managing indices] - [Reindex data] - [Index templates][documentation index templates] - [OpenSearch Data Streams] -- [OpenSearch Indexes and Data streams] +- [OpenSearch Indices and Data streams] - [Snapshot Operations in OpenSearch] @@ -1419,7 +1419,7 @@ DELETE _snapshot/repository-name/snapshot-name [documentation]: https://opensearch.org/docs/latest/ [index management]: https://opensearch.org/docs/latest/dashboards/im-dashboards/index-management/ [index settings]: https://opensearch.org/docs/latest/install-and-configure/configuring-opensearch/index-settings/ -[managing indexes]: https://opensearch.org/docs/latest/im-plugin/ +[managing indices]: https://opensearch.org/docs/latest/im-plugin/ [reindex data]: https://opensearch.org/docs/latest/im-plugin/reindex-data/ [rest api reference]: https://opensearch.org/docs/latest/api-reference/ [set up a hot-warm architecture]: https://opensearch.org/docs/latest/tuning-your-cluster/#advanced-step-7-set-up-a-hot-warm-architecture @@ -1438,7 +1438,7 @@ DELETE _snapshot/repository-name/snapshot-name [lucene]: https://lucene.apache.org/ [okapi bm25]: https://en.wikipedia.org/wiki/Okapi_BM25 [opensearch data streams]: https://opster.com/guides/opensearch/opensearch-machine-learning/opensearch-data-streams/ -[opensearch indexes and data streams]: https://stackoverflow.com/questions/75394622/opensearch-indexes-and-data-streams#75494264 +[opensearch indices and data streams]: https://stackoverflow.com/questions/75394622/opensearch-indices-and-data-streams#75494264 [setting up hot-warm architecture for ism in opensearch]: https://opster.com/guides/opensearch/opensearch-data-architecture/setting-up-hot-warm-architecture-for-ism/ [stepping up for a truly open source elasticsearch]: https://aws.amazon.com/blogs/opensource/stepping-up-for-a-truly-open-source-elasticsearch/ [top 14 elk alternatives in 2024]: https://signoz.io/blog/elk-alternatives/ diff --git a/snippets/opensearch/api.txt b/snippets/opensearch/api.txt index 7560e7d..47106ff 100644 --- a/snippets/opensearch/api.txt +++ b/snippets/opensearch/api.txt @@ -1,7 +1,11 @@ # List indices +GET _list/indices +GET _list/indices/index-name-here?v +GET _list/indices/index1,index2,index3?v # in cold storage GET _cold/indices/_search -# in warm storage +# in hot or warm storage (aws-managed domains only) +GET _cat/indices/_hot GET _cat/indices/_warm # Search for indices in cold storage @@ -56,6 +60,29 @@ POST _reindex?pretty "dest": {"index": "destinationIndex"} } +# Close open indices +# disables read and write operations on the impacted index +POST /prometheus-logs-20231205/_close + +# Re-open closed indices +# re-enables read and write operations on the impacted index +POST /prometheus-logs-20231205/_open + +# Update indices' settings. +# static settings can only be updated on *closed* indexes. +PUT /prometheus-logs-20231205/_settings +{ + "index": { + "codec": "zstd_no_dict", + "codec.compression_level": 3, + "refresh_interval": "2s" + } +} + +# Delete indices +# one at a time +DELETE /index-name-here + # Register snapshot repositories # aws-managed domains PUT _snapshot/repository-name-here diff --git a/snippets/opensearch/commands.fish b/snippets/opensearch/commands.fish index 3b6278a..89a0035 100644 --- a/snippets/opensearch/commands.fish +++ b/snippets/opensearch/commands.fish @@ -21,7 +21,8 @@ awscurl --service 'es' \ | jq -r '.indices[].index' - \ | tr '\n' ',' -# Migrate all indices from warm to hot storage +# Migrate all indices from ultrawarm to hot storage +# only aws-managed opensearch domains awscurl --service 'es' \ 'https://search-aws-domain-abcdefghijklmnopqrstuvwxyz.eu-west-1.es.amazonaws.com/_cat/indices/_warm' \ | grep 'app-cwl-' | sort | cut -d ' ' -f 3 \ @@ -35,6 +36,13 @@ seq 83 72 \ 'https://search-aws-domain-abcdefghijklmnopqrstuvwxyz.eu-west-1.es.amazonaws.com/_snapshot/repo/app-logs-0000%%' \ -d '{"indices": "app-logs-0000%%", "include_global_state": false}' +# Keep an eye on snapshots +watch -n '5' " \ + awscurl --service 'es' \ + 'https://search-aws-domain-abcdefghijklmnopqrstuvwxyz.eu-west-1.es.amazonaws.com/_snapshot/_status' \ + | jq '.snapshots[]?|{\"name\":.snapshot,\"state\":.state,\"shards\":.shards_stats}' - \ +" + # Delete indices that have been snapshotted awscurl --service 'es' \ 'https://search-aws-domain-abcdefghijklmnopqrstuvwxyz.eu-west-1.es.amazonaws.com/_snapshot/some-repo/some-snap' \