buffer.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. // Package asyncbuffer provides an asynchronous buffer that reads data from an
  2. // io.Reader in the background.
  3. //
  4. // When created, AsyncBuffer starts reading from the upstream reader in the
  5. // background. If a read error occurs, it is stored and can be checked with
  6. // AsyncBuffer.Error().
  7. //
  8. // When reading through AsyncBuffer.Reader().Read(), the error is only returned
  9. // once the reader reaches the point where the error occurred. In other words,
  10. // errors are delayed until encountered by the reader.
  11. //
  12. // However, AsyncBuffer.Close() and AsyncBuffer.Error() will immediately return
  13. // any stored error, even if the reader has not yet reached the error point.
  14. package asyncbuffer
  15. import (
  16. "errors"
  17. "io"
  18. "sync"
  19. "sync/atomic"
  20. )
  21. const (
  22. // chunkSize is the size of each chunk in bytes
  23. chunkSize = 4096
  24. // pauseThreshold is the size of the file which is always read to memory. Data beyond the
  25. // threshold is read only if accessed. If not a multiple of chunkSize, the last chunk it points
  26. // to is read in full.
  27. pauseThreshold = 32768 // 32 KiB
  28. )
  29. // byteChunk is a struct that holds a buffer and the data read from the upstream reader
  30. // data slice is required since the chunk read may be smaller than ChunkSize
  31. type byteChunk struct {
  32. buf []byte
  33. data []byte
  34. }
  35. // chunkPool is a global sync.Pool that holds byteChunk objects for
  36. // all readers
  37. var chunkPool = sync.Pool{
  38. New: func() any {
  39. buf := make([]byte, chunkSize)
  40. return &byteChunk{
  41. buf: buf,
  42. data: buf[:0],
  43. }
  44. },
  45. }
  46. // AsyncBuffer is a wrapper around io.Reader that reads data in chunks
  47. // in background and allows reading from synchronously.
  48. type AsyncBuffer struct {
  49. r io.ReadCloser // Upstream reader
  50. chunks []*byteChunk // References to the chunks read from the upstream reader
  51. mu sync.RWMutex // Mutex on chunks slice
  52. err atomic.Value // Error that occurred during reading
  53. len atomic.Int64 // Total length of the data read
  54. finished atomic.Bool // Indicates that the buffer has finished reading
  55. closed atomic.Bool // Indicates that the buffer was closed
  56. paused *Latch // Paused buffer does not read data beyond threshold
  57. ticker *Ticker // Ticker that signals when a new chunk is ready
  58. }
  59. // FromReadCloser creates a new AsyncBuffer that reads from the given io.Reader in background
  60. func FromReader(r io.ReadCloser) *AsyncBuffer {
  61. ab := &AsyncBuffer{
  62. r: r,
  63. paused: NewLatch(),
  64. ticker: NewTicker(),
  65. }
  66. go ab.readChunks()
  67. return ab
  68. }
  69. // addChunk adds a new chunk to the AsyncBuffer, increments len and signals that a chunk is ready
  70. func (ab *AsyncBuffer) addChunk(chunk *byteChunk) {
  71. ab.mu.Lock()
  72. defer ab.mu.Unlock()
  73. if ab.closed.Load() {
  74. // If the reader is closed, we return the chunk to the pool
  75. chunkPool.Put(chunk)
  76. return
  77. }
  78. // Store the chunk, increase chunk size, increase length of the data read
  79. ab.chunks = append(ab.chunks, chunk)
  80. ab.len.Add(int64(len(chunk.data)))
  81. ab.ticker.Tick()
  82. }
  83. // finishAndCloseReader marks the reader as finished
  84. func (ab *AsyncBuffer) finishAndCloseReader() {
  85. ab.mu.Lock()
  86. defer ab.mu.Unlock()
  87. // Indicate that the reader has finished reading
  88. ab.finished.Store(true)
  89. ab.ticker.Close()
  90. // Close the upstream reader
  91. if err := ab.r.Close(); err != nil {
  92. ab.err.Store(err) // Store the error if it occurred during closing
  93. }
  94. }
  95. // readChunks reads data from the upstream reader in background and stores them in the pool
  96. func (ab *AsyncBuffer) readChunks() {
  97. defer ab.finishAndCloseReader()
  98. // Stop reading if the reader is finished
  99. for !ab.closed.Load() {
  100. // In case we are trying to read data beyond threshold and we are paused,
  101. // wait for pause to be released.
  102. if ab.len.Load() >= pauseThreshold {
  103. ab.paused.Wait()
  104. // If the reader has been closed while waiting, we can stop reading
  105. if ab.closed.Load() {
  106. return // No more data to read
  107. }
  108. }
  109. // Get a chunk from the pool
  110. // If the pool is empty, it will create a new byteChunk with ChunkSize
  111. chunk, ok := chunkPool.Get().(*byteChunk)
  112. if !ok {
  113. ab.err.Store(errors.New("asyncbuffer.AsyncBuffer.readChunks: failed to get chunk from pool"))
  114. return
  115. }
  116. // Read data into the chunk's buffer
  117. // There is no way to guarantee that this would
  118. n, err := io.ReadFull(ab.r, chunk.buf)
  119. // If it's not the EOF, we need to store the error
  120. if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
  121. ab.err.Store(err)
  122. chunkPool.Put(chunk)
  123. return
  124. }
  125. // No bytes were read (n == 0), we can return the chunk to the pool
  126. if err == io.EOF || n == 0 {
  127. chunkPool.Put(chunk)
  128. return
  129. }
  130. // Resize the chunk's data slice to the number of bytes read
  131. chunk.data = chunk.buf[:n]
  132. // Store the reference to the chunk in the AsyncBuffer
  133. ab.addChunk(chunk)
  134. // We got ErrUnexpectedEOF meaning that some bytes were read, but this is the
  135. // end of the stream, so we can stop reading
  136. if err == io.ErrUnexpectedEOF {
  137. return
  138. }
  139. }
  140. }
  141. // closedError returns an error if the attempt to read on a closed reader was made.
  142. // If the reader had an error, it returns that error instead.
  143. func (ab *AsyncBuffer) closedError() error {
  144. // If the reader is closed, we return the error or nil
  145. if !ab.closed.Load() {
  146. return nil
  147. }
  148. err := ab.Error()
  149. if err == nil {
  150. err = errors.New("asyncbuffer.AsyncBuffer.ReadAt: attempt to read on closed reader")
  151. }
  152. return err
  153. }
  154. // offsetAvailable checks if the data at the given offset is available for reading.
  155. // It may return io.EOF if the reader is finished reading and the offset is beyond the end of the stream.
  156. func (ab *AsyncBuffer) offsetAvailable(off int64) (bool, error) {
  157. // We can not read data from the closed reader, none
  158. if err := ab.closedError(); err != nil {
  159. return false, err
  160. }
  161. // In case the offset falls within the already read chunks, we can return immediately,
  162. // even if error has occurred in the future
  163. if off < ab.len.Load() {
  164. return true, nil
  165. }
  166. // In case the reader is finished reading, and we have not read enough
  167. // data yet, return either error or EOF
  168. if ab.finished.Load() {
  169. // In case, error has occurred, we need to return it
  170. if err := ab.Error(); err != nil {
  171. return false, err
  172. }
  173. // Otherwise, it's EOF if the offset is beyond the end of the stream
  174. return false, io.EOF
  175. }
  176. // No available data
  177. return false, nil
  178. }
  179. // WaitFor waits for the data to be ready at the given offset. nil means ok.
  180. // It guarantees that the chunk at the given offset is ready to be read.
  181. func (ab *AsyncBuffer) WaitFor(off int64) error {
  182. // In case we are trying to read data which would potentially hit the pause threshold,
  183. // we need to unpause the reader ASAP.
  184. if off >= pauseThreshold {
  185. ab.paused.Release()
  186. }
  187. for {
  188. ok, err := ab.offsetAvailable(off)
  189. if ok || err != nil {
  190. return err
  191. }
  192. ab.ticker.Wait()
  193. }
  194. }
  195. // Wait waits for the reader to finish reading all data and returns
  196. // the total length of the data read.
  197. func (ab *AsyncBuffer) Wait() (int64, error) {
  198. // Wait ends till the end of the stream: unpause the reader
  199. ab.paused.Release()
  200. for {
  201. // We can not read data from the closed reader
  202. if err := ab.closedError(); err != nil {
  203. return 0, err
  204. }
  205. // In case the reader is finished reading, we can return immediately
  206. if ab.finished.Load() {
  207. return ab.len.Load(), ab.Error()
  208. }
  209. // Lock until the next chunk is ready
  210. ab.ticker.Wait()
  211. }
  212. }
  213. // Error returns the error that occurred during reading data in background.
  214. func (ab *AsyncBuffer) Error() error {
  215. err := ab.err.Load()
  216. if err == nil {
  217. return nil
  218. }
  219. errCast, ok := err.(error)
  220. if !ok {
  221. return errors.New("asyncbuffer.AsyncBuffer.Error: failed to get error")
  222. }
  223. return errCast
  224. }
  225. // readChunkAt copies data from the chunk at the given absolute offset to the provided slice.
  226. // Chunk must be available when this method is called.
  227. // Returns the number of bytes copied to the slice or 0 if chunk has no data
  228. // (eg. offset is beyond the end of the stream).
  229. func (ab *AsyncBuffer) readChunkAt(p []byte, off int64) int {
  230. // If the chunk is not available, we return 0
  231. if off >= ab.len.Load() {
  232. return 0
  233. }
  234. ind := off / chunkSize // chunk index
  235. chunk := ab.chunks[ind]
  236. startOffset := off % chunkSize // starting offset in the chunk
  237. // If the offset in current chunk is greater than the data
  238. // it has, we return 0
  239. if startOffset >= int64(len(chunk.data)) {
  240. return 0
  241. }
  242. // Copy data to the target slice. The number of bytes to copy is limited by the
  243. // size of the target slice and the size of the data in the chunk.
  244. return copy(p, chunk.data[startOffset:])
  245. }
  246. // readAt reads data from the AsyncBuffer at the given offset.
  247. //
  248. // Please note that if pause threshold is hit in the middle of the reading,
  249. // the data beyond the threshold may not be available.
  250. //
  251. // If the reader is paused and we try to read data beyond the pause threshold,
  252. // it will wait till something could be returned.
  253. func (ab *AsyncBuffer) readAt(p []byte, off int64) (int, error) {
  254. size := int64(len(p)) // total size of the data to read
  255. if off < 0 {
  256. return 0, errors.New("asyncbuffer.AsyncBuffer.readAt: negative offset")
  257. }
  258. // If we plan to hit threshold while reading, release the paused reader
  259. if int64(len(p))+off > pauseThreshold {
  260. ab.paused.Release()
  261. }
  262. // Wait for the offset to be available.
  263. // It may return io.EOF if the offset is beyond the end of the stream.
  264. err := ab.WaitFor(off)
  265. if err != nil {
  266. return 0, err
  267. }
  268. // We lock the mutex until current buffer is read
  269. ab.mu.RLock()
  270. defer ab.mu.RUnlock()
  271. // If the reader is closed, we return an error
  272. if err := ab.closedError(); err != nil {
  273. return 0, err
  274. }
  275. // Read data from the first chunk
  276. n := ab.readChunkAt(p, off)
  277. if n == 0 {
  278. return 0, io.EOF // Failed to read any data: means we tried to read beyond the end of the stream
  279. }
  280. size -= int64(n)
  281. off += int64(n) // Here and beyond off always points to the last read byte + 1
  282. // Now, let's try to read the rest of the data from next chunks while they are available
  283. for size > 0 {
  284. // If data is not available at the given offset, we can return data read so far.
  285. ok, err := ab.offsetAvailable(off)
  286. if !ok || err != nil {
  287. return n, err
  288. }
  289. // Read data from the next chunk
  290. nX := ab.readChunkAt(p[n:], off)
  291. n += nX
  292. size -= int64(nX)
  293. off += int64(nX)
  294. // If we read data shorter than ChunkSize or, in case that was the last chunk, less than
  295. // the size of the tail, return kind of EOF
  296. if int64(nX) < min(size, int64(chunkSize)) {
  297. return n, io.EOF
  298. }
  299. }
  300. return n, nil
  301. }
  302. // Close closes the AsyncBuffer and releases all resources.
  303. // It returns an error if the reader was already closed or if there was
  304. // an error during reading data in background even if none of the subsequent
  305. // readers have reached the position where the error occurred.
  306. func (ab *AsyncBuffer) Close() error {
  307. ab.mu.Lock()
  308. defer ab.mu.Unlock()
  309. // If the reader is already closed, we return immediately error or nil
  310. if ab.closed.Load() {
  311. return ab.Error()
  312. } else {
  313. ab.closed.Store(true)
  314. }
  315. ab.finished.Store(true)
  316. // Return all chunks to the pool
  317. for _, chunk := range ab.chunks {
  318. chunkPool.Put(chunk)
  319. }
  320. // Release the paused latch so that no goroutines are waiting for it
  321. ab.paused.Release()
  322. return nil
  323. }
  324. // Reader returns an io.ReadSeeker+io.ReaderAt that can be used to read actual data from the AsyncBuffer
  325. func (ab *AsyncBuffer) Reader() *Reader {
  326. return &Reader{ab: ab, pos: 0}
  327. }