3 年之前 · ad8d064de5
--- a/docs/reference/cat/nodes.asciidoc
+++ b/docs/reference/cat/nodes.asciidoc
@@ -6,8 +6,8 @@
 
				 
			
 
				 [IMPORTANT]
			
 
				 ====
			
 
				-cat APIs are only intended for human consumption using the command line or {kib} 
			
 
				-console. They are _not_ intended for use by applications. For application 
			
 
				+cat APIs are only intended for human consumption using the command line or {kib}
			
 
				+console. They are _not_ intended for use by applications. For application
			
 
				 consumption, use the <<cluster-nodes-info,nodes info API>>.
			
 
				 ====
			
 
				 
			
@@ -314,6 +314,12 @@ Time spent in suggest, such as `0`.
 
				 
			
 
				 `suggest.total`, `suto`, `suggestTotal`::
			
 
				 Number of suggest operations, such as `0`.
			
 
				+
			
 
				+`mappings.total_count`, `mtc`, `mappingsTotalCount`::
			
 
				+Number of mappings, including <<runtime,runtime>> and <<object,object>> fields.
			
 
				+
			
 
				+`mappings.total_estimated_overhead_in_bytes`, `mteo`, `mappingsTotalEstimatedOverheadInBytes`::
			
 
				+Estimated heap overhead, in bytes, of mappings on this node, which allows for 1KiB of heap for every mapped field.
			
 
				 --
			
 
				 
			
 
				 include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=help]
			
--- a/docs/reference/how-to/size-your-shards.asciidoc
+++ b/docs/reference/how-to/size-your-shards.asciidoc
@@ -222,31 +222,128 @@ GET _cat/shards?v=true
 
				 
			
 
				 [discrete]
			
 
				 [[field-count-recommendation]]
			
 
				-==== Data nodes should have at least 1kB of heap per field per index, plus overheads
			
 
				-
			
 
				-The exact resource usage of each mapped field depends on its type, but a rule
			
 
				-of thumb is to allow for approximately 1kB of heap overhead per mapped field
			
 
				-per index held by each data node. In a running cluster, you can also consult the
			
 
				-<<cluster-nodes-stats,Nodes stats API>>'s `mappings` indices statistic, which
			
 
				-reports the number of field mappings and an estimation of their heap overhead.
			
 
				-
			
 
				-Additionally, you must also allow enough heap for {es}'s
			
 
				-baseline usage as well as your workload such as indexing, searches and
			
 
				-aggregations. 0.5GB of extra heap will suffice for many reasonable workloads,
			
 
				-and you may need even less if your workload is very light while heavy workloads
			
 
				-may require more.
			
 
				-
			
 
				-For example, if a data node holds shards from 1000 indices, each containing
			
 
				-4000 mapped fields, then you should allow approximately 1000 × 4000 × 1kB = 4GB
			
 
				-of heap for the fields and another 0.5GB of heap for its workload and other
			
 
				-overheads, and therefore this node will need a heap size of at least 4.5GB.
			
 
				-
			
 
				-Note that this rule defines the absolute maximum number of indices that a data
			
 
				-node can manage, but does not guarantee the performance of searches or indexing
			
 
				-involving this many indices. You must also ensure that your data nodes have
			
 
				-adequate resources for your workload and that your overall sharding strategy
			
 
				-meets all your performance requirements. See also <<single-thread-per-shard>>
			
 
				-and <<each-shard-has-overhead>>.
			
 
				+==== Allow enough heap for field mappers and overheads
			
 
				+
			
 
				+Mapped fields consume some heap memory on each node, and require extra
			
 
				+heap on data nodes.
			
 
				+Ensure each node has enough heap for mappings, and also allow
			
 
				+extra space for overheads associated with its workload. The following sections
			
 
				+show how to determine these heap requirements.
			
 
				+
			
 
				+[discrete]
			
 
				+===== Mapping metadata in the cluster state
			
 
				+
			
 
				+Each node in the cluster has a copy of the <<cluster-state-api-desc,cluster state>>.
			
 
				+The cluster state includes information about the field mappings for
			
 
				+each index. This information has heap overhead. You can use the
			
 
				+<<cluster-stats,Cluster stats API>> to get the heap overhead of the total size of
			
 
				+all mappings after deduplication and compression.
			
 
				+
			
 
				+[source,console]
			
 
				+----
			
 
				+GET _cluster/stats?human&filter_path=indices.mappings.total_deduplicated_mapping_size*
			
 
				+----
			
 
				+// TEST[setup:node]
			
 
				+
			
 
				+This will show you information like in this example output:
			
 
				+
			
 
				+[source,console-result]
			
 
				+----
			
 
				+{
			
 
				+  "indices": {
			
 
				+    "mappings": {
			
 
				+      "total_deduplicated_mapping_size": "1gb",
			
 
				+      "total_deduplicated_mapping_size_in_bytes": 1073741824
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----
			
 
				+// TESTRESPONSE[s/"total_deduplicated_mapping_size": "1gb"/"total_deduplicated_mapping_size": $body.$_path/]
			
 
				+// TESTRESPONSE[s/"total_deduplicated_mapping_size_in_bytes": 1073741824/"total_deduplicated_mapping_size_in_bytes": $body.$_path/]
			
 
				+
			
 
				+[discrete]
			
 
				+===== Retrieving heap size and field mapper overheads
			
 
				+
			
 
				+You can use the <<cluster-nodes-stats,Nodes stats API>> to get two relevant metrics
			
 
				+for each node:
			
 
				+
			
 
				+* The size of the heap on each node.
			
 
				+
			
 
				+* Any additional estimated heap overhead for the fields per node. This is specific to
			
 
				+data nodes, where apart from the cluster state field information mentioned above,
			
 
				+there is additional heap overhead for each mapped field of an index held by the data
			
 
				+node. For nodes which are not data nodes, this field may be zero.
			
 
				+
			
 
				+[source,console]
			
 
				+----
			
 
				+GET _nodes/stats?human&filter_path=nodes.*.name,nodes.*.indices.mappings.total_estimated_overhead*,nodes.*.jvm.mem.heap_max*
			
 
				+----
			
 
				+// TEST[setup:node]
			
 
				+
			
 
				+For each node, this will show you information like in this example output:
			
 
				+
			
 
				+[source,console-result]
			
 
				+----
			
 
				+{
			
 
				+  "nodes": {
			
 
				+    "USpTGYaBSIKbgSUJR2Z9lg": {
			
 
				+      "name": "node-0",
			
 
				+      "indices": {
			
 
				+        "mappings": {
			
 
				+          "total_estimated_overhead": "1gb",
			
 
				+          "total_estimated_overhead_in_bytes": 1073741824
			
 
				+        }
			
 
				+      },
			
 
				+      "jvm": {
			
 
				+        "mem": {
			
 
				+          "heap_max": "4gb",
			
 
				+          "heap_max_in_bytes": 4294967296
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+----
			
 
				+// TESTRESPONSE[s/"USpTGYaBSIKbgSUJR2Z9lg"/\$node_name/]
			
 
				+// TESTRESPONSE[s/"name": "node-0"/"name": $body.$_path/]
			
 
				+// TESTRESPONSE[s/"total_estimated_overhead": "1gb"/"total_estimated_overhead": $body.$_path/]
			
 
				+// TESTRESPONSE[s/"total_estimated_overhead_in_bytes": 1073741824/"total_estimated_overhead_in_bytes": $body.$_path/]
			
 
				+// TESTRESPONSE[s/"heap_max": "4gb"/"heap_max": $body.$_path/]
			
 
				+// TESTRESPONSE[s/"heap_max_in_bytes": 4294967296/"heap_max_in_bytes": $body.$_path/]
			
 
				+
			
 
				+[discrete]
			
 
				+===== Consider additional heap overheads
			
 
				+
			
 
				+Apart from the two field overhead metrics above, you must additionally allow
			
 
				+enough heap for {es}'s baseline usage as well as your workload such as indexing,
			
 
				+searches and aggregations. 0.5GB of extra heap will suffice for many reasonable
			
 
				+workloads, and you may need even less if your workload is very light while heavy
			
 
				+workloads may require more.
			
 
				+
			
 
				+[discrete]
			
 
				+===== Example
			
 
				+
			
 
				+As an example, consider the outputs above for a data node. The heap of the node
			
 
				+will need at least:
			
 
				+
			
 
				+* 1 GB for the cluster state field information.
			
 
				+
			
 
				+* 1 GB for the additional estimated heap overhead for the fields of the data node.
			
 
				+
			
 
				+* 0.5 GB of extra heap for other overheads.
			
 
				+
			
 
				+Since the node has a 4GB heap max size in the example, it is thus sufficient
			
 
				+for the total required heap of 2.5GB.
			
 
				+
			
 
				+If the heap max size for a node is not sufficient, consider
			
 
				+<<avoid-unnecessary-fields,avoiding unnecessary fields>>,
			
 
				+or scaling up the cluster, or redistributing index shards.
			
 
				+
			
 
				+Note that the above rules do not necessarily guarantee the performance of
			
 
				+searches or indexing involving a very high number of indices. You must also
			
 
				+ensure that your data nodes have adequate resources for your workload and
			
 
				+that your overall sharding strategy meets all your performance requirements.
			
 
				+See also <<single-thread-per-shard>> and <<each-shard-has-overhead>>.
			
 
				 
			
 
				 [discrete]
			
 
				 [[avoid-node-hotspots]]
			
--- a/server/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java
+++ b/server/src/main/java/org/elasticsearch/rest/action/cat/RestNodesAction.java
@@ -312,7 +312,7 @@ public class RestNodesAction extends AbstractCatAction {
 
				         table.addCell("mappings.total_count", "alias:mtc,mappingsTotalCount;default:false;text-align:right;desc:number of mappings");
			
 
				         table.addCell(
			
 
				             "mappings.total_estimated_overhead_in_bytes",
			
 
				-            "alias:mteoi,mappingsTotalEstimatedOverheadInBytes;default:false;text-align:right;desc:estimated"
			
 
				+            "alias:mteo,mappingsTotalEstimatedOverheadInBytes;default:false;text-align:right;desc:estimated"
			
 
				                 + " overhead in bytes of mappings"
			
 
				         );