recovery.asciidoc 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. [[indices-recovery]]
  2. === Indices Recovery
  3. The indices recovery API provides insight into on-going index shard recoveries.
  4. Recovery status may be reported for specific indices, or cluster-wide.
  5. For example, the following command would show recovery information for the indices "index1" and "index2".
  6. [source,console]
  7. --------------------------------------------------
  8. GET index1,index2/_recovery?human
  9. --------------------------------------------------
  10. // TEST[s/^/PUT index1\nPUT index2\n/]
  11. To see cluster-wide recovery status simply leave out the index names.
  12. //////////////////////////
  13. Here we create a repository and snapshot index1 in
  14. order to restore it right after and prints out the
  15. indices recovery result.
  16. [source,console]
  17. --------------------------------------------------
  18. # create the index
  19. PUT index1
  20. {"settings": {"index.number_of_shards": 1}}
  21. # create the repository
  22. PUT /_snapshot/my_repository
  23. {"type": "fs","settings": {"location": "recovery_asciidoc" }}
  24. # snapshot the index
  25. PUT /_snapshot/my_repository/snap_1?wait_for_completion=true
  26. # delete the index
  27. DELETE index1
  28. # and restore the snapshot
  29. POST /_snapshot/my_repository/snap_1/_restore?wait_for_completion=true
  30. --------------------------------------------------
  31. [source,console-result]
  32. --------------------------------------------------
  33. {
  34. "snapshot": {
  35. "snapshot": "snap_1",
  36. "indices": [
  37. "index1"
  38. ],
  39. "shards": {
  40. "total": 1,
  41. "failed": 0,
  42. "successful": 1
  43. }
  44. }
  45. }
  46. --------------------------------------------------
  47. //////////////////////////
  48. [source,console]
  49. --------------------------------------------------
  50. GET /_recovery?human
  51. --------------------------------------------------
  52. // TEST[continued]
  53. Response:
  54. [source,console-result]
  55. --------------------------------------------------
  56. {
  57. "index1" : {
  58. "shards" : [ {
  59. "id" : 0,
  60. "type" : "SNAPSHOT",
  61. "stage" : "INDEX",
  62. "primary" : true,
  63. "start_time" : "2014-02-24T12:15:59.716",
  64. "start_time_in_millis": 1393244159716,
  65. "stop_time" : "0s",
  66. "stop_time_in_millis" : 0,
  67. "total_time" : "2.9m",
  68. "total_time_in_millis" : 175576,
  69. "source" : {
  70. "repository" : "my_repository",
  71. "snapshot" : "my_snapshot",
  72. "index" : "index1",
  73. "version" : "{version}",
  74. "restoreUUID": "PDh1ZAOaRbiGIVtCvZOMww"
  75. },
  76. "target" : {
  77. "id" : "ryqJ5lO5S4-lSFbGntkEkg",
  78. "host" : "my.fqdn",
  79. "transport_address" : "my.fqdn",
  80. "ip" : "10.0.1.7",
  81. "name" : "my_es_node"
  82. },
  83. "index" : {
  84. "size" : {
  85. "total" : "75.4mb",
  86. "total_in_bytes" : 79063092,
  87. "reused" : "0b",
  88. "reused_in_bytes" : 0,
  89. "recovered" : "65.7mb",
  90. "recovered_in_bytes" : 68891939,
  91. "percent" : "87.1%"
  92. },
  93. "files" : {
  94. "total" : 73,
  95. "reused" : 0,
  96. "recovered" : 69,
  97. "percent" : "94.5%"
  98. },
  99. "total_time" : "0s",
  100. "total_time_in_millis" : 0,
  101. "source_throttle_time" : "0s",
  102. "source_throttle_time_in_millis" : 0,
  103. "target_throttle_time" : "0s",
  104. "target_throttle_time_in_millis" : 0
  105. },
  106. "translog" : {
  107. "recovered" : 0,
  108. "total" : 0,
  109. "percent" : "100.0%",
  110. "total_on_start" : 0,
  111. "total_time" : "0s",
  112. "total_time_in_millis" : 0,
  113. },
  114. "verify_index" : {
  115. "check_index_time" : "0s",
  116. "check_index_time_in_millis" : 0,
  117. "total_time" : "0s",
  118. "total_time_in_millis" : 0
  119. }
  120. } ]
  121. }
  122. }
  123. --------------------------------------------------
  124. // TESTRESPONSE[s/: (\-)?[0-9]+/: $body.$_path/]
  125. // TESTRESPONSE[s/: "[^"]*"/: $body.$_path/]
  126. ////
  127. The TESTRESPONSE above replace all the fields values by the expected ones in the test,
  128. because we don't really care about the field values but we want to check the fields names.
  129. ////
  130. The above response shows a single index recovering a single shard. In this case, the source of the recovery is a snapshot repository
  131. and the target of the recovery is the node with name "my_es_node".
  132. Additionally, the output shows the number and percent of files recovered, as well as the number and percent of bytes recovered.
  133. In some cases a higher level of detail may be preferable. Setting "detailed=true" will present a list of physical files in recovery.
  134. [source,console]
  135. --------------------------------------------------
  136. GET _recovery?human&detailed=true
  137. --------------------------------------------------
  138. // TEST[s/^/PUT index1\n{"settings": {"index.number_of_shards": 1}}\n/]
  139. Response:
  140. [source,console-result]
  141. --------------------------------------------------
  142. {
  143. "index1" : {
  144. "shards" : [ {
  145. "id" : 0,
  146. "type" : "STORE",
  147. "stage" : "DONE",
  148. "primary" : true,
  149. "start_time" : "2014-02-24T12:38:06.349",
  150. "start_time_in_millis" : "1393245486349",
  151. "stop_time" : "2014-02-24T12:38:08.464",
  152. "stop_time_in_millis" : "1393245488464",
  153. "total_time" : "2.1s",
  154. "total_time_in_millis" : 2115,
  155. "source" : {
  156. "id" : "RGMdRc-yQWWKIBM4DGvwqQ",
  157. "host" : "my.fqdn",
  158. "transport_address" : "my.fqdn",
  159. "ip" : "10.0.1.7",
  160. "name" : "my_es_node"
  161. },
  162. "target" : {
  163. "id" : "RGMdRc-yQWWKIBM4DGvwqQ",
  164. "host" : "my.fqdn",
  165. "transport_address" : "my.fqdn",
  166. "ip" : "10.0.1.7",
  167. "name" : "my_es_node"
  168. },
  169. "index" : {
  170. "size" : {
  171. "total" : "24.7mb",
  172. "total_in_bytes" : 26001617,
  173. "reused" : "24.7mb",
  174. "reused_in_bytes" : 26001617,
  175. "recovered" : "0b",
  176. "recovered_in_bytes" : 0,
  177. "percent" : "100.0%"
  178. },
  179. "files" : {
  180. "total" : 26,
  181. "reused" : 26,
  182. "recovered" : 0,
  183. "percent" : "100.0%",
  184. "details" : [ {
  185. "name" : "segments.gen",
  186. "length" : 20,
  187. "recovered" : 20
  188. }, {
  189. "name" : "_0.cfs",
  190. "length" : 135306,
  191. "recovered" : 135306
  192. }, {
  193. "name" : "segments_2",
  194. "length" : 251,
  195. "recovered" : 251
  196. }
  197. ]
  198. },
  199. "total_time" : "2ms",
  200. "total_time_in_millis" : 2,
  201. "source_throttle_time" : "0s",
  202. "source_throttle_time_in_millis" : 0,
  203. "target_throttle_time" : "0s",
  204. "target_throttle_time_in_millis" : 0
  205. },
  206. "translog" : {
  207. "recovered" : 71,
  208. "total" : 0,
  209. "percent" : "100.0%",
  210. "total_on_start" : 0,
  211. "total_time" : "2.0s",
  212. "total_time_in_millis" : 2025
  213. },
  214. "verify_index" : {
  215. "check_index_time" : 0,
  216. "check_index_time_in_millis" : 0,
  217. "total_time" : "88ms",
  218. "total_time_in_millis" : 88
  219. }
  220. } ]
  221. }
  222. }
  223. --------------------------------------------------
  224. // TESTRESPONSE[s/"source" : \{[^}]*\}/"source" : $body.$_path/]
  225. // TESTRESPONSE[s/"details" : \[[^\]]*\]/"details" : $body.$_path/]
  226. // TESTRESPONSE[s/: (\-)?[0-9]+/: $body.$_path/]
  227. // TESTRESPONSE[s/: "[^"]*"/: $body.$_path/]
  228. ////
  229. The TESTRESPONSE above replace all the fields values by the expected ones in the test,
  230. because we don't really care about the field values but we want to check the fields names.
  231. It also removes the "details" part which is important in this doc but really hard to test.
  232. ////
  233. This response shows a detailed listing (truncated for brevity) of the actual files recovered and their sizes.
  234. Also shown are the timings in milliseconds of the various stages of recovery: index retrieval, translog replay, and index start time.
  235. Note that the above listing indicates that the recovery is in stage "done". All recoveries, whether on-going or complete, are kept in
  236. cluster state and may be reported on at any time. Setting "active_only=true" will cause only on-going recoveries to be reported.
  237. Here is a complete list of options:
  238. [horizontal]
  239. `detailed`:: Display a detailed view. This is primarily useful for viewing the recovery of physical index files. Default: false.
  240. `active_only`:: Display only those recoveries that are currently on-going. Default: false.
  241. Description of output fields:
  242. [horizontal]
  243. `id`:: Shard ID
  244. `type`:: Recovery type:
  245. * store
  246. * snapshot
  247. * replica
  248. * relocating
  249. `stage`:: Recovery stage:
  250. * init: Recovery has not started
  251. * index: Reading index meta-data and copying bytes from source to destination
  252. * start: Starting the engine; opening the index for use
  253. * translog: Replaying transaction log
  254. * finalize: Cleanup
  255. * done: Complete
  256. `primary`:: True if shard is primary, false otherwise
  257. `start_time`:: Timestamp of recovery start
  258. `stop_time`:: Timestamp of recovery finish
  259. `total_time_in_millis`:: Total time to recover shard in milliseconds
  260. `source`:: Recovery source:
  261. * repository description if recovery is from a snapshot
  262. * description of source node otherwise
  263. `target`:: Destination node
  264. `index`:: Statistics about physical index recovery
  265. `translog`:: Statistics about translog recovery
  266. `start`:: Statistics about time to open and start the index