123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688 |
- [[docs-bulk]]
- === Bulk API
- ++++
- <titleabbrev>Bulk</titleabbrev>
- ++++
- Performs multiple indexing or delete operations in a single API call.
- This reduces overhead and can greatly increase indexing speed.
- [source,console]
- --------------------------------------------------
- POST _bulk
- { "index" : { "_index" : "test", "_id" : "1" } }
- { "field1" : "value1" }
- { "delete" : { "_index" : "test", "_id" : "2" } }
- { "create" : { "_index" : "test", "_id" : "3" } }
- { "field1" : "value3" }
- { "update" : {"_id" : "1", "_index" : "test"} }
- { "doc" : {"field2" : "value2"} }
- --------------------------------------------------
- [[docs-bulk-api-request]]
- ==== {api-request-title}
- `POST /_bulk`
- `POST /<target>/_bulk`
- [[docs-bulk-api-desc]]
- ==== {api-description-title}
- Provides a way to perform multiple `index`, `create`, `delete`, and `update` actions in a single request.
- The actions are specified in the request body using a newline delimited JSON (NDJSON) structure:
- [source,js]
- --------------------------------------------------
- action_and_meta_data\n
- optional_source\n
- action_and_meta_data\n
- optional_source\n
- ....
- action_and_meta_data\n
- optional_source\n
- --------------------------------------------------
- // NOTCONSOLE
- The `index` and `create` actions expect a source on the next line,
- and have the same semantics as the `op_type` parameter in the standard index API:
- `create` fails if a document with the same ID already exists in the target,
- `index` adds or replaces a document as necessary.
- NOTE: <<data-streams,Data streams>> support only the `create` action. To update
- or delete a document in a data stream, you must target the backing index
- containing the document. See <<update-delete-docs-in-a-backing-index>>.
- `update` expects that the partial doc, upsert,
- and script and its options are specified on the next line.
- `delete` does not expect a source on the next line and
- has the same semantics as the standard delete API.
- [NOTE]
- ====
- The final line of data must end with a newline character `\n`.
- Each newline character may be preceded by a carriage return `\r`.
- When sending requests to the `_bulk` endpoint,
- the `Content-Type` header should be set to `application/x-ndjson`.
- ====
- Because this format uses literal `\n`'s as delimiters,
- make sure that the JSON actions and sources are not pretty printed.
- If you provide a `<target>` in the request path,
- it is used for any actions that don't explicitly specify an `_index` argument.
- A note on the format: The idea here is to make processing of this as
- fast as possible. As some of the actions are redirected to other
- shards on other nodes, only `action_meta_data` is parsed on the
- receiving node side.
- Client libraries using this protocol should try and strive to do
- something similar on the client side, and reduce buffering as much as
- possible.
- There is no "correct" number of actions to perform in a single bulk request.
- Experiment with different settings to find the optimal size for your particular workload.
- When using the HTTP API, make sure that the client does not send HTTP chunks,
- as this will slow things down.
- [discrete]
- [[bulk-clients]]
- ===== Client support for bulk requests
- Some of the officially supported clients provide helpers to assist with
- bulk requests and reindexing:
- Go::
- See https://github.com/elastic/go-elasticsearch/tree/master/_examples/bulk#indexergo[esutil.BulkIndexer]
- Perl::
- See https://metacpan.org/pod/Search::Elasticsearch::Client::5_0::Bulk[Search::Elasticsearch::Client::5_0::Bulk]
- and https://metacpan.org/pod/Search::Elasticsearch::Client::5_0::Scroll[Search::Elasticsearch::Client::5_0::Scroll]
- Python::
- See https://elasticsearch-py.readthedocs.org/en/master/helpers.html[elasticsearch.helpers.*]
- JavaScript::
- See {jsclient-current}/client-helpers.html[client.helpers.*]
- .NET::
- See https://www.elastic.co/guide/en/elasticsearch/client/net-api/current/indexing-documents.html#bulkall-observable[`BulkAllObservable`]
- [discrete]
- [[bulk-curl]]
- ===== Submitting bulk requests with cURL
- If you're providing text file input to `curl`, you *must* use the
- `--data-binary` flag instead of plain `-d`. The latter doesn't preserve
- newlines. Example:
- [source,js]
- --------------------------------------------------
- $ cat requests
- { "index" : { "_index" : "test", "_id" : "1" } }
- { "field1" : "value1" }
- $ curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/_bulk --data-binary "@requests"; echo
- {"took":7, "errors": false, "items":[{"index":{"_index":"test","_id":"1","_version":1,"result":"created","forced_refresh":false}}]}
- --------------------------------------------------
- // NOTCONSOLE
- // Not converting to console because this shows how curl works
- [discrete]
- [[bulk-optimistic-concurrency-control]]
- ===== Optimistic Concurrency Control
- Each `index` and `delete` action within a bulk API call may include the
- `if_seq_no` and `if_primary_term` parameters in their respective action
- and meta data lines. The `if_seq_no` and `if_primary_term` parameters control
- how operations are executed, based on the last modification to existing
- documents. See <<optimistic-concurrency-control>> for more details.
- [discrete]
- [[bulk-versioning]]
- ===== Versioning
- Each bulk item can include the version value using the
- `version` field. It automatically follows the behavior of the
- index / delete operation based on the `_version` mapping. It also
- support the `version_type` (see <<index-versioning, versioning>>).
- [discrete]
- [[bulk-routing]]
- ===== Routing
- Each bulk item can include the routing value using the
- `routing` field. It automatically follows the behavior of the
- index / delete operation based on the `_routing` mapping.
- NOTE: Data streams do not support custom routing. Instead, target the
- appropriate backing index for the stream.
- [discrete]
- [[bulk-wait-for-active-shards]]
- ===== Wait For Active Shards
- When making bulk calls, you can set the `wait_for_active_shards`
- parameter to require a minimum number of shard copies to be active
- before starting to process the bulk request. See
- <<index-wait-for-active-shards,here>> for further details and a usage
- example.
- [discrete]
- [[bulk-refresh]]
- ===== Refresh
- Control when the changes made by this request are visible to search. See
- <<docs-refresh,refresh>>.
- NOTE: Only the shards that receive the bulk request will be affected by
- `refresh`. Imagine a `_bulk?refresh=wait_for` request with three
- documents in it that happen to be routed to different shards in an index
- with five shards. The request will only wait for those three shards to
- refresh. The other two shards that make up the index do not
- participate in the `_bulk` request at all.
- [discrete]
- [[bulk-security]]
- ===== Security
- See <<url-access-control>>.
- [[docs-bulk-api-path-params]]
- ==== {api-path-parms-title}
- `<target>`::
- (Optional, string)
- Name of the data stream, index, or index alias to perform bulk actions
- on.
- [[docs-bulk-api-query-params]]
- ==== {api-query-parms-title}
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=pipeline]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=refresh]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=routing]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=source]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=source_excludes]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=source_includes]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=timeout]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=wait_for_active_shards]
- [[bulk-api-request-body]]
- ==== {api-request-body-title}
- The request body contains a newline-delimited list of `create`, `delete`, `index`,
- and `update` actions and their associated source data.
- `create`::
- (Optional, string)
- Indexes the specified document if it does not already exist.
- The following line must contain the source data to be indexed.
- +
- --
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=bulk-index]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=bulk-id]
- --
- `delete`::
- (Optional, string)
- Removes the specified document from the index.
- +
- --
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=bulk-index]
- `_id`::
- (Required, string)
- The document ID.
- --
- `index`::
- (Optional, string)
- Indexes the specified document.
- If the document exists, replaces the document and increments the version.
- The following line must contain the source data to be indexed.
- +
- --
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=bulk-index]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=bulk-id]
- --
- `update`::
- (Optional, string)
- Performs a partial document update.
- The following line must contain the partial document and update options.
- +
- --
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=bulk-index]
- include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=bulk-id]
- --
- `doc`::
- (Optional, object)
- The partial document to index.
- Required for `update` operations.
- `<fields>`::
- (Optional, object)
- The document source to index.
- Required for `create` and `index` operations.
- [role="child_attributes"]
- [[bulk-api-response-body]]
- ==== {api-response-body-title}
- The bulk API's response contains the individual results of each operation in the
- request, returned in the order submitted. The success or failure of an
- individual operation does not affect other operations in the request.
- [[bulk-partial-responses]]
- .Partial responses
- ****
- To ensure fast responses, the bulk API will respond with partial results if one
- or more shards fail. See <<shard-failures, Shard failures>> for more
- information.
- ****
- `took`::
- (integer)
- How long, in milliseconds, it took to process the bulk request.
- `errors`::
- (boolean)
- If `true`, one or more of the operations in the bulk request did not complete
- successfully.
- `items`::
- (array of objects)
- Contains the result of each operation in the bulk request, in the order they
- were submitted.
- +
- .Properties of `items` objects
- [%collapsible%open]
- ====
- <action>::
- (object)
- The parameter name is an action associated with the operation. Possible values
- are `create`, `delete`, `index`, and `update`.
- +
- The parameter value is an object that contains information for the associated
- operation.
- +
- .Properties of `<action>`
- [%collapsible%open]
- =====
- `_index`::
- (string)
- Name of the index associated with the operation. If the operation targeted a
- data stream, this is the backing index into which the document was written.
- `_id`::
- (integer)
- The document ID associated with the operation.
- `_version`::
- (integer)
- The document version associated with the operation. The document version is
- incremented each time the document is updated.
- +
- This parameter is only returned for successful actions.
- `result`::
- (string)
- Result of the operation. Successful values are `created`, `deleted`, and
- `updated`.
- +
- This parameter is only returned for successful operations.
- `_shards`::
- (object)
- Contains shard information for the operation.
- +
- This parameter is only returned for successful operations.
- +
- .Properties of `_shards`
- [%collapsible%open]
- ======
- `total`::
- (integer)
- Number of shards the operation attempted to execute on.
- `successful`::
- (integer)
- Number of shards the operation succeeded on.
- `failed`::
- (integer)
- Number of shards the operation attempted to execute on but failed.
- ======
- `_seq_no`::
- (integer)
- The sequence number assigned to the document for the operation.
- Sequence numbers are used to ensure an older version of a document
- doesn’t overwrite a newer version. See <<optimistic-concurrency-control-index>>.
- +
- This parameter is only returned for successful operations.
- `_primary_term`::
- (integer)
- The primary term assigned to the document for the operation.
- See <<optimistic-concurrency-control-index>>.
- +
- This parameter is only returned for successful operations.
- `status`::
- (integer)
- HTTP status code returned for the operation.
- `error`::
- (object)
- Contains additional information about the failed operation.
- +
- The parameter is only returned for failed operations.
- +
- .Properties of `error`
- [%collapsible%open]
- ======
- `type`::
- (string)
- Error type for the operation.
- `reason`::
- (string)
- Reason for the failed operation.
- `index_uuid`::
- (string)
- The universally unique identifier (UUID) of the index associated with the failed
- operation.
- `shard`::
- (string)
- ID of the shard associated with the failed operation.
- `index`::
- (string)
- Name of the index associated with the failed operation. If the operation
- targeted a data stream, this is the backing index into which the document was
- attempted to be written.
- ======
- =====
- ====
- [[docs-bulk-api-example]]
- ==== {api-examples-title}
- [source,console]
- --------------------------------------------------
- POST _bulk
- { "index" : { "_index" : "test", "_id" : "1" } }
- { "field1" : "value1" }
- { "delete" : { "_index" : "test", "_id" : "2" } }
- { "create" : { "_index" : "test", "_id" : "3" } }
- { "field1" : "value3" }
- { "update" : {"_id" : "1", "_index" : "test"} }
- { "doc" : {"field2" : "value2"} }
- --------------------------------------------------
- The API returns the following result:
- [source,console-result]
- --------------------------------------------------
- {
- "took": 30,
- "errors": false,
- "items": [
- {
- "index": {
- "_index": "test",
- "_id": "1",
- "_version": 1,
- "result": "created",
- "_shards": {
- "total": 2,
- "successful": 1,
- "failed": 0
- },
- "status": 201,
- "_seq_no" : 0,
- "_primary_term": 1
- }
- },
- {
- "delete": {
- "_index": "test",
- "_id": "2",
- "_version": 1,
- "result": "not_found",
- "_shards": {
- "total": 2,
- "successful": 1,
- "failed": 0
- },
- "status": 404,
- "_seq_no" : 1,
- "_primary_term" : 2
- }
- },
- {
- "create": {
- "_index": "test",
- "_id": "3",
- "_version": 1,
- "result": "created",
- "_shards": {
- "total": 2,
- "successful": 1,
- "failed": 0
- },
- "status": 201,
- "_seq_no" : 2,
- "_primary_term" : 3
- }
- },
- {
- "update": {
- "_index": "test",
- "_id": "1",
- "_version": 2,
- "result": "updated",
- "_shards": {
- "total": 2,
- "successful": 1,
- "failed": 0
- },
- "status": 200,
- "_seq_no" : 3,
- "_primary_term" : 4
- }
- }
- ]
- }
- --------------------------------------------------
- // TESTRESPONSE[s/"took": 30/"took": $body.took/]
- // TESTRESPONSE[s/"index_uuid": .../"index_uuid": $body.items.3.update.error.index_uuid/]
- // TESTRESPONSE[s/"_seq_no" : 0/"_seq_no" : $body.items.0.index._seq_no/]
- // TESTRESPONSE[s/"_primary_term" : 1/"_primary_term" : $body.items.0.index._primary_term/]
- // TESTRESPONSE[s/"_seq_no" : 1/"_seq_no" : $body.items.1.delete._seq_no/]
- // TESTRESPONSE[s/"_primary_term" : 2/"_primary_term" : $body.items.1.delete._primary_term/]
- // TESTRESPONSE[s/"_seq_no" : 2/"_seq_no" : $body.items.2.create._seq_no/]
- // TESTRESPONSE[s/"_primary_term" : 3/"_primary_term" : $body.items.2.create._primary_term/]
- // TESTRESPONSE[s/"_seq_no" : 3/"_seq_no" : $body.items.3.update._seq_no/]
- // TESTRESPONSE[s/"_primary_term" : 4/"_primary_term" : $body.items.3.update._primary_term/]
- [discrete]
- [[bulk-update]]
- ===== Bulk update example
- When using the `update` action, `retry_on_conflict` can be used as a field in
- the action itself (not in the extra payload line), to specify how many
- times an update should be retried in the case of a version conflict.
- The `update` action payload supports the following options: `doc`
- (partial document), `upsert`, `doc_as_upsert`, `script`, `params` (for
- script), `lang` (for script), and `_source`. See update documentation for details on
- the options. Example with update actions:
- [source,console]
- --------------------------------------------------
- POST _bulk
- { "update" : {"_id" : "1", "_index" : "index1", "retry_on_conflict" : 3} }
- { "doc" : {"field" : "value"} }
- { "update" : { "_id" : "0", "_index" : "index1", "retry_on_conflict" : 3} }
- { "script" : { "source": "ctx._source.counter += params.param1", "lang" : "painless", "params" : {"param1" : 1}}, "upsert" : {"counter" : 1}}
- { "update" : {"_id" : "2", "_index" : "index1", "retry_on_conflict" : 3} }
- { "doc" : {"field" : "value"}, "doc_as_upsert" : true }
- { "update" : {"_id" : "3", "_index" : "index1", "_source" : true} }
- { "doc" : {"field" : "value"} }
- { "update" : {"_id" : "4", "_index" : "index1"} }
- { "doc" : {"field" : "value"}, "_source": true}
- --------------------------------------------------
- [discrete]
- [[bulk-failures-ex]]
- ===== Example with failed actions
- The following bulk API request includes operations that update non-existent
- documents.
- [source,console]
- ----
- POST /_bulk
- { "update": {"_id": "5", "_index": "index1"} }
- { "doc": {"my_field": "foo"} }
- { "update": {"_id": "6", "_index": "index1"} }
- { "doc": {"my_field": "foo"} }
- { "create": {"_id": "7", "_index": "index1"} }
- { "my_field": "foo" }
- ----
- Because these operations cannot complete successfully, the API returns a
- response with an `errors` flag of `true`.
- The response also includes an `error` object for any failed operations. The
- `error` object contains additional information about the failure, such as the
- error type and reason.
- [source,console-result]
- ----
- {
- "took": 486,
- "errors": true,
- "items": [
- {
- "update": {
- "_index": "index1",
- "_id": "5",
- "status": 404,
- "error": {
- "type": "document_missing_exception",
- "reason": "[5]: document missing",
- "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA",
- "shard": "0",
- "index": "index1"
- }
- }
- },
- {
- "update": {
- "_index": "index1",
- "_id": "6",
- "status": 404,
- "error": {
- "type": "document_missing_exception",
- "reason": "[6]: document missing",
- "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA",
- "shard": "0",
- "index": "index1"
- }
- }
- },
- {
- "create": {
- "_index": "index1",
- "_id": "7",
- "_version": 1,
- "result": "created",
- "_shards": {
- "total": 2,
- "successful": 1,
- "failed": 0
- },
- "_seq_no": 0,
- "_primary_term": 1,
- "status": 201
- }
- }
- ]
- }
- ----
- // TESTRESPONSE[s/"took": 486/"took": $body.took/]
- // TESTRESPONSE[s/"_seq_no": 0/"_seq_no": $body.items.2.create._seq_no/]
- // TESTRESPONSE[s/"index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA"/"index_uuid": $body.$_path/]
- To return only information about failed operations, use the
- <<common-options-response-filtering,`filter_path`>> query parameter with an
- argument of `items.*.error`.
- [source,console]
- ----
- POST /_bulk?filter_path=items.*.error
- { "update": {"_id": "5", "_index": "index1"} }
- { "doc": {"my_field": "baz"} }
- { "update": {"_id": "6", "_index": "index1"} }
- { "doc": {"my_field": "baz"} }
- { "update": {"_id": "7", "_index": "index1"} }
- { "doc": {"my_field": "baz"} }
- ----
- // TEST[continued]
- The API returns the following result.
- [source,console-result]
- ----
- {
- "items": [
- {
- "update": {
- "error": {
- "type": "document_missing_exception",
- "reason": "[5]: document missing",
- "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA",
- "shard": "0",
- "index": "index1"
- }
- }
- },
- {
- "update": {
- "error": {
- "type": "document_missing_exception",
- "reason": "[6]: document missing",
- "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA",
- "shard": "0",
- "index": "index1"
- }
- }
- }
- ]
- }
- ----
- // TESTRESPONSE[s/"index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA"/"index_uuid": $body.$_path/]
|