recovery.asciidoc 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. [[indices-recovery]]
  2. == Indices Recovery
  3. The indices recovery API provides insight into on-going index shard recoveries.
  4. Recovery status may be reported for specific indices, or cluster-wide.
  5. For example, the following command would show recovery information for the indices "index1" and "index2".
  6. [source,js]
  7. --------------------------------------------------
  8. GET index1,index2/_recovery?human
  9. --------------------------------------------------
  10. // CONSOLE
  11. // TEST[s/^/PUT index1\nPUT index2\n/]
  12. To see cluster-wide recovery status simply leave out the index names.
  13. //////////////////////////
  14. Here we create a repository and snapshot index1 in
  15. order to restore it right after and prints out the
  16. indices recovery result.
  17. [source,js]
  18. --------------------------------------------------
  19. # create the index
  20. PUT index1
  21. {"settings": {"index.number_of_shards": 1}}
  22. # create the repository
  23. PUT /_snapshot/my_repository
  24. {"type": "fs","settings": {"location": "recovery_asciidoc" }}
  25. # snapshot the index
  26. PUT /_snapshot/my_repository/snap_1?wait_for_completion=true
  27. # delete the index
  28. DELETE index1
  29. # and restore the snapshot
  30. POST /_snapshot/my_repository/snap_1/_restore?wait_for_completion=true
  31. --------------------------------------------------
  32. // CONSOLE
  33. [source,js]
  34. --------------------------------------------------
  35. {
  36. "snapshot": {
  37. "snapshot": "snap_1",
  38. "indices": [
  39. "index1"
  40. ],
  41. "shards": {
  42. "total": 1,
  43. "failed": 0,
  44. "successful": 1
  45. }
  46. }
  47. }
  48. --------------------------------------------------
  49. // TESTRESPONSE
  50. //////////////////////////
  51. [source,js]
  52. --------------------------------------------------
  53. GET /_recovery?human
  54. --------------------------------------------------
  55. // CONSOLE
  56. // TEST[continued]
  57. Response:
  58. [source,js]
  59. --------------------------------------------------
  60. {
  61. "index1" : {
  62. "shards" : [ {
  63. "id" : 0,
  64. "type" : "SNAPSHOT",
  65. "stage" : "INDEX",
  66. "primary" : true,
  67. "start_time" : "2014-02-24T12:15:59.716",
  68. "start_time_in_millis": 1393244159716,
  69. "stop_time" : "0s",
  70. "stop_time_in_millis" : 0,
  71. "total_time" : "2.9m",
  72. "total_time_in_millis" : 175576,
  73. "source" : {
  74. "repository" : "my_repository",
  75. "snapshot" : "my_snapshot",
  76. "index" : "index1",
  77. "version" : "{version}"
  78. },
  79. "target" : {
  80. "id" : "ryqJ5lO5S4-lSFbGntkEkg",
  81. "host" : "my.fqdn",
  82. "transport_address" : "my.fqdn",
  83. "ip" : "10.0.1.7",
  84. "name" : "my_es_node"
  85. },
  86. "index" : {
  87. "size" : {
  88. "total" : "75.4mb",
  89. "total_in_bytes" : 79063092,
  90. "reused" : "0b",
  91. "reused_in_bytes" : 0,
  92. "recovered" : "65.7mb",
  93. "recovered_in_bytes" : 68891939,
  94. "percent" : "87.1%"
  95. },
  96. "files" : {
  97. "total" : 73,
  98. "reused" : 0,
  99. "recovered" : 69,
  100. "percent" : "94.5%"
  101. },
  102. "total_time" : "0s",
  103. "total_time_in_millis" : 0,
  104. "source_throttle_time" : "0s",
  105. "source_throttle_time_in_millis" : 0,
  106. "target_throttle_time" : "0s",
  107. "target_throttle_time_in_millis" : 0
  108. },
  109. "translog" : {
  110. "recovered" : 0,
  111. "total" : 0,
  112. "percent" : "100.0%",
  113. "total_on_start" : 0,
  114. "total_time" : "0s",
  115. "total_time_in_millis" : 0,
  116. },
  117. "verify_index" : {
  118. "check_index_time" : "0s",
  119. "check_index_time_in_millis" : 0,
  120. "total_time" : "0s",
  121. "total_time_in_millis" : 0
  122. }
  123. } ]
  124. }
  125. }
  126. --------------------------------------------------
  127. // TESTRESPONSE[s/: (\-)?[0-9]+/: $body.$_path/]
  128. // TESTRESPONSE[s/: "[^"]*"/: $body.$_path/]
  129. ////
  130. The TESTRESPONSE above replace all the fields values by the expected ones in the test,
  131. because we don't really care about the field values but we want to check the fields names.
  132. ////
  133. The above response shows a single index recovering a single shard. In this case, the source of the recovery is a snapshot repository
  134. and the target of the recovery is the node with name "my_es_node".
  135. Additionally, the output shows the number and percent of files recovered, as well as the number and percent of bytes recovered.
  136. In some cases a higher level of detail may be preferable. Setting "detailed=true" will present a list of physical files in recovery.
  137. [source,js]
  138. --------------------------------------------------
  139. GET _recovery?human&detailed=true
  140. --------------------------------------------------
  141. // CONSOLE
  142. // TEST[s/^/PUT index1\n{"settings": {"index.number_of_shards": 1}}\n/]
  143. Response:
  144. [source,js]
  145. --------------------------------------------------
  146. {
  147. "index1" : {
  148. "shards" : [ {
  149. "id" : 0,
  150. "type" : "STORE",
  151. "stage" : "DONE",
  152. "primary" : true,
  153. "start_time" : "2014-02-24T12:38:06.349",
  154. "start_time_in_millis" : "1393245486349",
  155. "stop_time" : "2014-02-24T12:38:08.464",
  156. "stop_time_in_millis" : "1393245488464",
  157. "total_time" : "2.1s",
  158. "total_time_in_millis" : 2115,
  159. "source" : {
  160. "id" : "RGMdRc-yQWWKIBM4DGvwqQ",
  161. "host" : "my.fqdn",
  162. "transport_address" : "my.fqdn",
  163. "ip" : "10.0.1.7",
  164. "name" : "my_es_node"
  165. },
  166. "target" : {
  167. "id" : "RGMdRc-yQWWKIBM4DGvwqQ",
  168. "host" : "my.fqdn",
  169. "transport_address" : "my.fqdn",
  170. "ip" : "10.0.1.7",
  171. "name" : "my_es_node"
  172. },
  173. "index" : {
  174. "size" : {
  175. "total" : "24.7mb",
  176. "total_in_bytes" : 26001617,
  177. "reused" : "24.7mb",
  178. "reused_in_bytes" : 26001617,
  179. "recovered" : "0b",
  180. "recovered_in_bytes" : 0,
  181. "percent" : "100.0%"
  182. },
  183. "files" : {
  184. "total" : 26,
  185. "reused" : 26,
  186. "recovered" : 0,
  187. "percent" : "100.0%",
  188. "details" : [ {
  189. "name" : "segments.gen",
  190. "length" : 20,
  191. "recovered" : 20
  192. }, {
  193. "name" : "_0.cfs",
  194. "length" : 135306,
  195. "recovered" : 135306
  196. }, {
  197. "name" : "segments_2",
  198. "length" : 251,
  199. "recovered" : 251
  200. }
  201. ]
  202. },
  203. "total_time" : "2ms",
  204. "total_time_in_millis" : 2,
  205. "source_throttle_time" : "0s",
  206. "source_throttle_time_in_millis" : 0,
  207. "target_throttle_time" : "0s",
  208. "target_throttle_time_in_millis" : 0
  209. },
  210. "translog" : {
  211. "recovered" : 71,
  212. "total" : 0,
  213. "percent" : "100.0%",
  214. "total_on_start" : 0,
  215. "total_time" : "2.0s",
  216. "total_time_in_millis" : 2025
  217. },
  218. "verify_index" : {
  219. "check_index_time" : 0,
  220. "check_index_time_in_millis" : 0,
  221. "total_time" : "88ms",
  222. "total_time_in_millis" : 88
  223. }
  224. } ]
  225. }
  226. }
  227. --------------------------------------------------
  228. // TESTRESPONSE[s/"source" : \{[^}]*\}/"source" : $body.$_path/]
  229. // TESTRESPONSE[s/"details" : \[[^\]]*\]//]
  230. // TESTRESPONSE[s/: (\-)?[0-9]+/: $body.$_path/]
  231. // TESTRESPONSE[s/: "[^"]*"/: $body.$_path/]
  232. ////
  233. The TESTRESPONSE above replace all the fields values by the expected ones in the test,
  234. because we don't really care about the field values but we want to check the fields names.
  235. It also removes the "details" part which is important in this doc but really hard to test.
  236. ////
  237. This response shows a detailed listing (truncated for brevity) of the actual files recovered and their sizes.
  238. Also shown are the timings in milliseconds of the various stages of recovery: index retrieval, translog replay, and index start time.
  239. Note that the above listing indicates that the recovery is in stage "done". All recoveries, whether on-going or complete, are kept in
  240. cluster state and may be reported on at any time. Setting "active_only=true" will cause only on-going recoveries to be reported.
  241. Here is a complete list of options:
  242. [horizontal]
  243. `detailed`:: Display a detailed view. This is primarily useful for viewing the recovery of physical index files. Default: false.
  244. `active_only`:: Display only those recoveries that are currently on-going. Default: false.
  245. Description of output fields:
  246. [horizontal]
  247. `id`:: Shard ID
  248. `type`:: Recovery type:
  249. * store
  250. * snapshot
  251. * replica
  252. * relocating
  253. `stage`:: Recovery stage:
  254. * init: Recovery has not started
  255. * index: Reading index meta-data and copying bytes from source to destination
  256. * start: Starting the engine; opening the index for use
  257. * translog: Replaying transaction log
  258. * finalize: Cleanup
  259. * done: Complete
  260. `primary`:: True if shard is primary, false otherwise
  261. `start_time`:: Timestamp of recovery start
  262. `stop_time`:: Timestamp of recovery finish
  263. `total_time_in_millis`:: Total time to recover shard in milliseconds
  264. `source`:: Recovery source:
  265. * repository description if recovery is from a snapshot
  266. * description of source node otherwise
  267. `target`:: Destination node
  268. `index`:: Statistics about physical index recovery
  269. `translog`:: Statistics about translog recovery
  270. `start`:: Statistics about time to open and start the index